Colinux base Index: linux-2.6.25-source/CREDITS =================================================================== --- linux-2.6.25-source.orig/CREDITS +++ linux-2.6.25-source/CREDITS @@ -16,6 +16,13 @@ S: (ask for current address) S: Finland +N: Dan Aloni +E: da-x@colinux.org +W: http://www.colinux.org +D: Cooperative Linux +D: Various kernel patches +S: Israel + N: Dragos Acostachioaie E: dragos@iname.com W: http://www.arbornet.org/~dragos Index: linux-2.6.25-source/Makefile =================================================================== --- linux-2.6.25-source.orig/Makefile +++ linux-2.6.25-source/Makefile @@ -299,7 +299,11 @@ AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld +ifeq ($(GCCTRACE),Y) +CC = $(COLINUX_ROOT)/bin/tracewrapper.py $(CROSS_COMPILE)gcc +else CC = $(CROSS_COMPILE)gcc +endif CPP = $(CC) -E AR = $(CROSS_COMPILE)ar NM = $(CROSS_COMPILE)nm Index: linux-2.6.25-source/arch/x86/Kconfig =================================================================== --- linux-2.6.25-source.orig/arch/x86/Kconfig +++ linux-2.6.25-source/arch/x86/Kconfig @@ -70,6 +70,7 @@ config ZONE_DMA def_bool y + depends on !COOPERATIVE config SBUS bool @@ -181,7 +182,7 @@ config X86_BIOS_REBOOT bool - depends on X86_32 && !(X86_VISWS || X86_VOYAGER) + depends on X86_32 && !(X86_VISWS || X86_VOYAGER || COOPERATIVE) default y config X86_TRAMPOLINE @@ -199,6 +200,7 @@ config SMP bool "Symmetric multi-processing support" + depends on !COOPERATIVE ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If @@ -412,6 +414,7 @@ config HPET_TIMER def_bool X86_64 prompt "HPET Timer Support" if X86_32 + depends on !COOPERATIVE help Use the IA-PC HPET (High Precision Event Timer) to manage time in preference to the PIT and RTC, if a HPET is @@ -530,7 +533,7 @@ config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH) + depends on X86_32 && !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH || COOPERATIVE) help A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -553,6 +556,11 @@ to use it. If you say Y here even though your machine doesn't have an IO-APIC, then the kernel will still run with no slowdown at all. +config X86_UP_COPIC + bool 'Cooperative PIC (COPIC) support' + depends on COOPERATIVE + default y + config X86_LOCAL_APIC def_bool y depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH)) @@ -646,6 +654,7 @@ config I8K tristate "Dell laptop support" + depends on !COOPERATIVE ---help--- This adds a driver to safely access the System Management Mode of the CPU on the Dell Inspiron 8000. The System Management Mode @@ -667,7 +676,7 @@ config X86_REBOOTFIXUPS def_bool n prompt "Enable X86 board specific fixups for reboot" - depends on X86_32 && X86 + depends on X86_32 && X86 && !COOPERATIVE ---help--- This enables chipset and/or board specific fixups to be done in order to get reboot to work correctly. This is only needed on @@ -765,14 +774,14 @@ config HIGHMEM4G bool "4GB" - depends on !X86_NUMAQ + depends on !X86_NUMAQ && !COOPERATIVE help Select this if you have a 32-bit processor and between 1 and 4 gigabytes of physical RAM. config HIGHMEM64G bool "64GB" - depends on !M386 && !M486 + depends on !M386 && !M486 && !COOPERATIVE select X86_PAE help Select this if you have a 32-bit processor and more than 4 @@ -831,7 +840,7 @@ config X86_PAE def_bool n prompt "PAE (Physical Address Extension) Support" - depends on X86_32 && !HIGHMEM4G + depends on X86_32 && !HIGHMEM4G && !COOPERATIVE select RESOURCES_64BIT help PAE is required for NX support, and furthermore enables @@ -930,7 +939,7 @@ config ARCH_SPARSEMEM_ENABLE def_bool y - depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) + depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC && !COOPERATIVE) select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 @@ -981,6 +990,7 @@ config MTRR bool "MTRR (Memory Type Range Register) support" + depends on !COOPERATIVE ---help--- On Intel P6 family processors (Pentium Pro, Pentium II and later) the Memory Type Range Registers (MTRRs) may be used to control @@ -1081,6 +1091,7 @@ config KEXEC bool "kexec system call" + depends on !COOPERATIVE help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot @@ -1205,6 +1216,17 @@ Say N if you want to disable CPU hotplug and don't need to suspend. +config COOPERATIVE + bool 'Cooperative Mode' + default y + +config COLINUX_STATS + bool 'Cooperative Linux stats' + depends on COOPERATIVE + default y + help + OS switch counters readable in /proc/colinux/stats. + config COMPAT_VDSO def_bool y prompt "Compat VDSO support" @@ -1229,7 +1251,7 @@ depends on NUMA menu "Power management options" - depends on !X86_VOYAGER + depends on !X86_VOYAGER && !COOPERATIVE config ARCH_HIBERNATION_HEADER def_bool y Index: linux-2.6.25-source/arch/x86/Kconfig.cpu =================================================================== --- linux-2.6.25-source.orig/arch/x86/Kconfig.cpu +++ linux-2.6.25-source/arch/x86/Kconfig.cpu @@ -392,7 +392,7 @@ config X86_TSC def_bool y - depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 + depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ && !COOPERATIVE) || X86_64 # this should be set for all -march=.. options where the compiler # generates cmov. Index: linux-2.6.25-source/arch/x86/kernel/Makefile =================================================================== --- linux-2.6.25-source.orig/arch/x86/kernel/Makefile +++ linux-2.6.25-source/arch/x86/kernel/Makefile @@ -7,6 +7,10 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) +ifdef CONFIG_COOPERATIVE +COLINUX_SUFFUIX := _cooperative +endif + # # vsyscalls (which work on the user stack) should have # no stack-protector checks: @@ -18,16 +22,18 @@ obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o -obj-y += time_$(BITS).o ioport.o ldt.o -obj-y += setup_$(BITS).o i8259_$(BITS).o +obj-y += ldt.o +obj-y += setup_$(BITS).o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o -obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o +obj-y += pci-dma_$(BITS).o bootflag.o obj-y += quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o +obj-y += alternative.o obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o -obj-y += tsc_$(BITS).o io_delay.o rtc.o +ifndef CONFIG_COOPERATIVE +obj-y += io_delay.o rtc.o +endif obj-y += i387.o obj-y += ptrace.o @@ -40,11 +46,14 @@ obj-y += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_X86_64) += reboot.o +obj-$(CONFIG_COOPERATIVE) += reboot_cooperative.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o +ifndef CONFIG_COOPERATIVE obj-$(CONFIG_PCI) += early-quirks.o +endif apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o @@ -97,3 +106,14 @@ obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o endif + +obj-y += ioport$(COLINUX_SUFFUIX).o +ifdef CONFIG_COOPERATIVE +obj-y += cooperative.o +obj-y += timer_cooperative.o +obj-y += i8259_cooperative.o +else +obj-y += i8253.o i8259_$(BITS).o +obj-y += time_$(BITS).o +obj-y += tsc_$(BITS).o e820_$(BITS).o +endif Index: linux-2.6.25-source/arch/x86/kernel/cooperative.c =================================================================== --- /dev/null +++ linux-2.6.25-source/arch/x86/kernel/cooperative.c @@ -0,0 +1,208 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +CO_TRACE_STOP; + +int co_passage_page_holding_count = 0; +bool co_host_fpu_saved = 0; +char co_host_fpu[0x200] __attribute__ ((__aligned__(16))); + +static void __init co_early_cpu_init(void) +{ + /* + * On the first switch to Linux we must set up a valid TR because + * the passage page code assumes such one exists. This is basically + * copied code from cpu_init(). + * + * P.S this is protected by CO_TRACE_STOP so that we don't + * have a monitor context switch. + */ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + struct tss_struct * t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + + load_idt(&idt_descr); + switch_to_new_gdt(); /* Sets GDT and %fs */ + + /* + * Delete NT + */ + __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); + + /* + * Set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + curr->active_mm = &init_mm; + enter_lazy_tlb(&init_mm, curr); + + load_sp0(t, thread); + set_tss_desc(cpu,t); + gdt[GDT_ENTRY_TSS].b &= 0xfffffdff; + + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_DOUBLEFAULT + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); + gdt[GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff; +#endif + + /* Clear %gs. */ + asm volatile ("mov %0, %%gs" : : "r" (0)); + + write_cr4(mmu_cr4_features); +} + +/* + * 'co_start' is the first Linux code that runs in the + * coLinux kernel context. It receives %ecx which contains the + * address of the passage page. The passage page code sets %ecx + * to this value in its context restore part. + */ + +void __init co_start(void) +{ + co_early_cpu_init(); + co_start_kernel(); +} + +static void co_switch_wrapper_protected(void) +{ + kernel_fpu_begin(); + + if (co_host_fpu_saved) { + CO_FPU_RESTORE(co_host_fpu); + co_host_fpu_saved = 0; + } + + /* And switch... */ + co_switch(); + + kernel_fpu_end(); +} + +void co_switch_wrapper(void) +{ + /* taken from irq.c: debugging check for stack overflow */ + long esp; + + __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE - 1)); + if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { + printk("co_switch_wrapper: stack overflow: %ld\n", esp - sizeof(struct thread_info)); + co_terminate(CO_TERMINATE_STACK_OVERFLOW); + } + +#ifdef CONFIG_COLINUX_STATS + co_proc_counts.switches[co_passage_page->operation]++; +#endif + co_switch_wrapper_protected(); +} + +void co_debug(const char *fmt, ...) +{ +} + +#define MAX_TRACE_POINTS 1024 + +typedef struct { + unsigned char *code; + unsigned char original_byte; + int off; +} co_tracepoint_t; + +co_tracepoint_t tracepoints[MAX_TRACE_POINTS]; +static int active_tracepoints = 0; + +void co_kernel_breakpoint(struct pt_regs * regs) +{ + int i = 0; + unsigned char *code = (unsigned char *)regs->ip; + if (!code) + return; + + code--; + for (i=0; i < active_tracepoints; i++) { + if (tracepoints[i].code == code) { + co_debug("TRACEPOINT: %p", code); + break; + } + } + + if (i == active_tracepoints) { + /* Bad, we don't know this tracepoint */ + co_terminate(CO_TERMINATE_INVALID_OPERATION); + return; + } + + *tracepoints[i].code = tracepoints[i].original_byte; + regs->flags |= (1 << 8); /* Enable TF */ + regs->ip = (unsigned long)code; + tracepoints[i].off = 1; +} + +void co_kernel_set_breakpoints(void) +{ + int i; + + for (i=0; i < active_tracepoints; i++) + if (tracepoints[i].code && tracepoints[i].off) { + *tracepoints[i].code = 0xcc; + tracepoints[i].off = 0; + } +} + +int co_kernel_debug(struct pt_regs *regs, long error_code, unsigned int condition) +{ + /* if not a single step trap */ + if (!(condition & DR_STEP)) + return 0; + + /* if userspace */ + if (regs->cs & 3) + return 0; + + regs->flags &= ~(1 << 8); /* Disable TF */ + + co_kernel_set_breakpoints(); + + return 1; +} + +void co_kernel_tracepoint_add(unsigned char *code) +{ + if (active_tracepoints >= MAX_TRACE_POINTS) + return; + + tracepoints[active_tracepoints].code = code; + tracepoints[active_tracepoints].original_byte = *code; + tracepoints[active_tracepoints].off = 0; + active_tracepoints++; + *code = 0xcc; +} + +co_arch_info_t co_arch_info = { + .kernel_cs = __KERNEL_CS, + .kernel_ds = __KERNEL_DS, +}; + +CO_TRACE_CONTINUE; Index: linux-2.6.25-source/arch/x86/kernel/entry_32.S =================================================================== --- linux-2.6.25-source.orig/arch/x86/kernel/entry_32.S +++ linux-2.6.25-source/arch/x86/kernel/entry_32.S @@ -619,7 +619,11 @@ SAVE_ALL TRACE_IRQS_OFF movl %esp,%eax +#ifdef CONFIG_COOPERATIVE + call proxy_interrupt_handler +#else call do_IRQ +#endif jmp ret_from_intr ENDPROC(common_interrupt) CFI_ENDPROC @@ -727,6 +731,9 @@ testl $0x4, %eax # EM (math emulation bit) jne device_not_available_emulate preempt_stop(CLBR_ANY) +#if defined(CONFIG_COOPERATIVE) && !defined(CONFIG_PREEMPT) + DISABLE_INTERRUPTS(CLBR_ANY) +#endif call math_state_restore jmp ret_from_exception device_not_available_emulate: Index: linux-2.6.25-source/arch/x86/kernel/head_32.S =================================================================== --- linux-2.6.25-source.orig/arch/x86/kernel/head_32.S +++ linux-2.6.25-source/arch/x86/kernel/head_32.S @@ -360,6 +360,7 @@ */ call setup_idt +ENTRY(co_arch_start_kernel) checkCPUtype: movl $-1,X86_CPUID # -1 for no CPUID initially @@ -555,6 +556,10 @@ #endif call dump_stack hlt_loop: +#ifdef CONFIG_COOPERATIVE + movl $2,%eax /* CO_TERMINATE_PANIC */ + call co_terminate +#endif hlt jmp hlt_loop @@ -645,7 +650,7 @@ .data ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE + .long init_thread_union+THREAD_SIZE-100 .long __BOOT_DS ready: .byte 0 Index: linux-2.6.25-source/arch/x86/kernel/process_32.c =================================================================== --- linux-2.6.25-source.orig/arch/x86/kernel/process_32.c +++ linux-2.6.25-source/arch/x86/kernel/process_32.c @@ -51,6 +51,7 @@ #endif #include +#include #include #include @@ -196,6 +197,9 @@ if (rcu_pending(cpu)) rcu_check_callbacks(cpu, 0); + if (cooperative_mode_enabled()) + idle = co_idle_processor; + if (!idle) idle = default_idle; @@ -226,6 +230,8 @@ */ void cpu_idle_wait(void) { + if (cooperative_mode_enabled()) + co_terminate(CO_TERMINATE_INVALID_OPERATION); smp_mb(); /* kick all the CPUs so that they exit out of pm_idle */ smp_call_function(do_nothing, NULL, 0, 1); Index: linux-2.6.25-source/arch/x86/kernel/setup_32.c =================================================================== --- linux-2.6.25-source.orig/arch/x86/kernel/setup_32.c +++ linux-2.6.25-source/arch/x86/kernel/setup_32.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -94,6 +95,7 @@ .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; +#ifndef CONFIG_COOPERATIVE static struct resource video_ram_resource = { .name = "Video RAM area", .start = 0xa0000, @@ -147,6 +149,7 @@ .end = 0x00ff, .flags = IORESOURCE_BUSY | IORESOURCE_IO } }; +#endif /* !CONFIG_COOPERATIVE */ /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; @@ -171,6 +174,9 @@ /* user-defined highmem size */ static unsigned int highmem_pages = -1; +/* colinux start_va */ +static long start_va = 0; + /* * Setup options */ @@ -229,6 +235,7 @@ int __initdata user_defined_memmap = 0; +#ifndef CONFIG_COOPERATIVE /* * "mem=nopentium" disables the 4MB page tables. * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM @@ -262,6 +269,7 @@ return 0; } early_param("mem", parse_mem); +#endif /* !CONFIG_COOPERATIVE */ #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header @@ -385,6 +393,7 @@ return max_low_pfn; } +#ifndef CONFIG_COOPERATIVE /* * workaround for Dell systems that neglect to reserve EBDA */ @@ -395,6 +404,7 @@ if (addr) reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT); } +#endif /* !CONFIG_COOPERATIVE */ #ifndef CONFIG_NEED_MULTIPLE_NODES void __init setup_bootmem_allocator(void); @@ -406,7 +416,14 @@ */ min_low_pfn = PFN_UP(init_pg_tables_end); +#ifdef CONFIG_COOPERATIVE + max_low_pfn = max_pfn = co_boot_params.co_memory_size / PAGE_SIZE; + min_low_pfn = PFN_UP(__pa((unsigned long)&_end)) + 0x10; + start_va = (unsigned long)__va(min_low_pfn << PAGE_SHIFT); + co_alloc_pages(start_va, 0x20); +#else /* CONFIG_COOPERATIVE */ max_low_pfn = find_max_low_pfn(); +#endif /* CONFIG_COOPERATIVE */ #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; @@ -436,8 +453,10 @@ { unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); +#ifndef CONFIG_COOPERATIVE max_zone_pfns[ZONE_DMA] = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; +#endif /* !CONFIG_COOPERATIVE */ max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; @@ -509,6 +528,16 @@ static void __init reserve_initrd(void) { +#ifdef CONFIG_COOPERATIVE + if (co_boot_params.co_initrd != NULL) { + initrd_start = (unsigned long)co_boot_params.co_initrd; + initrd_end = (unsigned long)co_boot_params.co_initrd + co_boot_params.co_initrd_size; + printk(KERN_INFO "initrd enabled: 0x%x-0x%x size: 0x%08lx\n", + (unsigned int)initrd_start, (unsigned int)initrd_end, (long unsigned int)co_boot_params.co_initrd_size); + + reserve_bootmem(virt_to_phys(co_boot_params.co_initrd), co_boot_params.co_initrd_size, BOOTMEM_DEFAULT); + } +#else /* CONFIG_COOPERATIVE */ unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; unsigned long ramdisk_end = ramdisk_image + ramdisk_size; @@ -549,6 +578,7 @@ initrd_end = initrd_start + ramdisk_size; do_relocate_initrd = true; +#endif /* CONFIG_COOPERATIVE */ } #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) @@ -605,6 +635,15 @@ */ bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); +#ifdef CONFIG_COOPERATIVE + { + unsigned long bootmem_end = + start_va + bootmap_size + (0x10 << PAGE_SHIFT); + unsigned long physical_end = + __PAGE_OFFSET + (max_low_pfn << PAGE_SHIFT); + free_bootmem(__pa(bootmem_end), physical_end - bootmem_end); + } +#else /* CONFIG_COOPERATIVE */ register_bootmem_low_pages(max_low_pfn); /* @@ -632,6 +671,7 @@ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 == 6) reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT); +#endif /* CONFIG_COOPERATIVE */ #ifdef CONFIG_SMP /* @@ -687,11 +727,13 @@ static void set_mca_bus(int x) { } #endif +#ifndef CONFIG_COOPERATIVE /* Overridden in paravirt.c if CONFIG_PARAVIRT */ char * __init __attribute__((weak)) memory_setup(void) { return machine_specific_memory_setup(); } +#endif /* * Determine if we were loaded by an EFI loader. If so, then we have also been @@ -736,8 +778,10 @@ #endif ARCH_SETUP +#ifndef CONFIG_COOPERATIVE printk(KERN_INFO "BIOS-provided physical RAM map:\n"); print_memory_map(memory_setup()); +#endif /* !CONFIG_COOPERATIVE */ copy_edd(); @@ -755,12 +799,17 @@ bss_resource.start = virt_to_phys(&__bss_start); bss_resource.end = virt_to_phys(&__bss_stop)-1; +#ifdef CONFIG_COOPERATIVE + strlcpy(boot_command_line, co_boot_params.co_boot_parameters, COMMAND_LINE_SIZE); +#endif parse_early_param(); +#ifndef CONFIG_COOPERATIVE if (user_defined_memmap) { printk(KERN_INFO "user-defined physical RAM map:\n"); print_memory_map("user"); } +#endif /* !CONFIG_COOPERATIVE */ strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; @@ -768,11 +817,13 @@ if (efi_enabled) efi_init(); +#ifndef CONFIG_COOPERATIVE /* update e820 for memory not covered by WB MTRRs */ find_max_pfn(); mtrr_bp_init(); if (mtrr_trim_uncached_memory(max_pfn)) find_max_pfn(); +#endif /* !CONFIG_COOPERATIVE */ max_low_pfn = setup_memory(); @@ -822,7 +873,9 @@ paravirt_post_allocator_init(); +#ifndef CONFIG_COOPERATIVE dmi_scan_machine(); +#endif io_delay_init(); @@ -854,11 +907,15 @@ get_smp_config(); #endif +#ifndef CONFIG_COOPERATIVE e820_register_memory(); e820_mark_nosave_regions(); +#endif /* !CONFIG_COOPERATIVE */ #ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) +#ifdef CONFIG_COOPERATIVE_CONSOLE + conswitchp = &colinux_con; +#elif defined(CONFIG_VGA_CONSOLE) if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) conswitchp = &vga_con; #elif defined(CONFIG_DUMMY_CONSOLE) @@ -867,6 +924,7 @@ #endif } +#ifndef CONFIG_COOPERATIVE /* * Request address space for all standard resources * @@ -889,3 +947,4 @@ } subsys_initcall(request_standard_resources); +#endif /* !CONFIG_COOPERATIVE */ Index: linux-2.6.25-source/arch/x86/kernel/traps_32.c =================================================================== --- linux-2.6.25-source.orig/arch/x86/kernel/traps_32.c +++ linux-2.6.25-source/arch/x86/kernel/traps_32.c @@ -58,6 +58,8 @@ #include #include +#include +#include #include "mach_traps.h" @@ -516,6 +518,12 @@ } kernel_trap: { + if (cooperative_mode_enabled()) { + if (trapnr == 3) { + co_kernel_breakpoint(regs); + return; + } + } if (!fixup_exception(regs)) { tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; @@ -878,6 +886,10 @@ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; + if (cooperative_mode_enabled() && + co_kernel_debug(regs, error_code, condition)) + return; + if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) return; @@ -1116,6 +1128,12 @@ struct task_struct *tsk = thread->task; clts(); /* Allow maths ops (or we recurse) */ +#ifdef CONFIG_COOPERATIVE + if (!co_host_fpu_saved) { + CO_FPU_SAVE(co_host_fpu); + co_host_fpu_saved = 1; + } +#endif /* CONFIG_COOPERATIVE */ if (!tsk_used_math(tsk)) init_fpu(tsk); restore_fpu(tsk); Index: linux-2.6.25-source/arch/x86/mm/fault.c =================================================================== --- linux-2.6.25-source.orig/arch/x86/mm/fault.c +++ linux-2.6.25-source/arch/x86/mm/fault.c @@ -217,6 +217,7 @@ page &= PAGE_MASK; page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)]; + page = CO_P_TO_PP(page); printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page); } @@ -497,6 +498,7 @@ unsigned long pgd_paddr; pmd_t *pmd_k; pte_t *pte_k; + /* * Synchronize this task's top level page-table * with the 'reference' page table. @@ -518,10 +520,6 @@ pmd_t *pmd, *pmd_ref; pte_t *pte, *pte_ref; - /* Make sure we are in vmalloc area */ - if (!(address >= VMALLOC_START && address < VMALLOC_END)) - return -1; - /* Copy kernel mappings over when needed. This can also happen within a race in page table update. In the later case just flush. */ @@ -620,6 +618,10 @@ #else if (unlikely(address >= TASK_SIZE64)) { #endif + /* Make sure we are in vmalloc area */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + goto bad_area_nosemaphore; + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && vmalloc_fault(address) >= 0) return; Index: linux-2.6.25-source/arch/x86/mm/init_32.c =================================================================== --- linux-2.6.25-source.orig/arch/x86/mm/init_32.c +++ linux-2.6.25-source/arch/x86/mm/init_32.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -54,7 +55,9 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; +#ifndef CONFIG_COOPERATIVE static noinline int do_test_wp_bit(void); +#endif /* * Creates a middle page table and puts a pointer to it in the @@ -100,7 +103,7 @@ } paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); - set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); + set_pmd(pmd, __pmd(CO_PP_TO_P(__pa(page_table)) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } @@ -142,6 +145,7 @@ } } +#ifndef CONFIG_COOPERATIVE static inline int is_kernel_text(unsigned long addr) { if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) @@ -218,6 +222,7 @@ return 1; return 0; } +#endif /* !CONFIG_COOPERATIVE */ #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; @@ -330,6 +335,7 @@ void __init native_pagetable_setup_start(pgd_t *base) { +#ifndef CONFIG_COOPERATIVE unsigned long pfn, va; pgd_t *pgd; pud_t *pud; @@ -358,6 +364,7 @@ pte_clear(NULL, va, pte); } paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT); +#endif /* CONFIG_COOPERATIVE */ } void __init native_pagetable_setup_done(pgd_t *base) @@ -390,6 +397,7 @@ paravirt_pagetable_setup_start(pgd_base); +#ifndef CONFIG_COOPERATIVE /* Enable PSE if available */ if (cpu_has_pse) set_in_cr4(X86_CR4_PSE); @@ -403,6 +411,7 @@ kernel_physical_mapping_init(pgd_base); remap_numa_kva(); +#endif /* !CONFIG_COOPERATIVE */ /* * Fixed mappings, only the page table structure has to be @@ -543,6 +552,7 @@ * used to involve black magic jumps to work around some nasty CPU bugs, * but fortunately the switch to using exceptions got rid of all that. */ +#ifndef CONFIG_COOPERATIVE static void __init test_wp_bit(void) { printk(KERN_INFO @@ -563,6 +573,7 @@ printk(KERN_CONT "Ok.\n"); } } +#endif /* !CONFIG_COOPERATIVE */ static struct kcore_list kcore_mem, kcore_vmalloc; @@ -621,15 +632,15 @@ #if 1 /* double-sanity-check paranoia */ printk(KERN_INFO "virtual kernel memory layout:\n" - " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM - " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#endif +#ifdef CONFIG_COOPERATIVE + " colinux : 0x%08lx - 0x%08lx (%4ld MB)\n" #endif - " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" - " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" - " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" - " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" - " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n", + FIXADDR_START, FIXADDR_TOP, (FIXADDR_TOP - FIXADDR_START) >> 10, @@ -638,8 +649,19 @@ (LAST_PKMAP*PAGE_SIZE) >> 10, #endif +#ifdef CONFIG_COOPERATIVE + CO_VPTR_BASE_START, CO_VPTR_BASE_END, + (CO_VPTR_BASE_END - CO_VPTR_BASE_START) >> 20, +#endif + VMALLOC_START, VMALLOC_END, - (VMALLOC_END - VMALLOC_START) >> 20, + (VMALLOC_END - VMALLOC_START) >> 20); + + printk(KERN_INFO + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", (unsigned long)__va(0), (unsigned long)high_memory, ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, @@ -658,12 +680,20 @@ BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); BUG_ON(VMALLOC_END > PKMAP_BASE); #endif +#ifdef CONFIG_COOPERATIVE + BUG_ON(CO_VPTR_BASE_END > FIXADDR_START); + BUG_ON(VMALLOC_END > CO_VPTR_BASE_START); + if (VMALLOC_START > VMALLOC_END) + panic("LOWMEM overlaps vmalloc. Decrease total memory with 'mem=...'!"); +#endif BUG_ON(VMALLOC_START > VMALLOC_END); BUG_ON((unsigned long)high_memory > VMALLOC_START); #endif /* double-sanity-check paranoia */ +#ifndef CONFIG_COOPERATIVE if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); +#endif cpa_init(); @@ -694,6 +724,7 @@ * This function cannot be __init, since exceptions don't work in that * section. Put this after the callers, so that it cannot be inlined. */ +#ifndef CONFIG_COOPERATIVE static noinline int do_test_wp_bit(void) { char tmp_reg; @@ -713,6 +744,7 @@ return flag; } +#endif /* !CONFIG_COOPERATIVE */ #ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; @@ -770,7 +802,9 @@ */ printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", begin, PAGE_ALIGN(end)); +#ifndef CONFIG_COOPERATIVE set_memory_np(begin, (end - begin) >> PAGE_SHIFT); +#endif /* !CONFIG_COOPERATIVE */ #else unsigned long addr; @@ -779,7 +813,9 @@ * we are going to free part of that, we need to make that * writeable first. */ +#ifndef CONFIG_COOPERATIVE set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); +#endif /* !CONFIG_COOPERATIVE */ for (addr = begin; addr < end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); Index: linux-2.6.25-source/arch/x86/mm/ioremap.c =================================================================== --- linux-2.6.25-source.orig/arch/x86/mm/ioremap.c +++ linux-2.6.25-source/arch/x86/mm/ioremap.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ int page_is_ram(unsigned long pagenr) { +#ifndef CONFIG_COOPERATIVE resource_size_t addr, end; int i; @@ -71,6 +73,7 @@ if ((pagenr >= addr) && (pagenr < end)) return 1; } +#endif /* !CONFIG_COOPERATIVE */ return 0; } @@ -200,12 +203,22 @@ */ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { + if (cooperative_mode_enabled()) { + panic("ioremap_nocache %zu:%lu\n", phys_addr, size); + return NULL; + } + return __ioremap(phys_addr, size, IOR_MODE_UNCACHED); } EXPORT_SYMBOL(ioremap_nocache); void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) { + if (cooperative_mode_enabled()) { + panic("ioremap_cache %zu:%lu\n", phys_addr, size); + return NULL; + } + return __ioremap(phys_addr, size, IOR_MODE_CACHED); } EXPORT_SYMBOL(ioremap_cache); Index: linux-2.6.25-source/include/asm-x86/bug.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/bug.h +++ linux-2.6.25-source/include/asm-x86/bug.h @@ -4,6 +4,10 @@ #ifdef CONFIG_BUG #define HAVE_ARCH_BUG +#ifdef CONFIG_COOPERATIVE +extern void co_terminate_bug(int code, int line, const char *file); +#define BUG() do { co_terminate_bug(0, __LINE__, __FILE__); } while(0) +#else /* CONFIG_COOPERATIVE */ #ifdef CONFIG_DEBUG_BUGVERBOSE #ifdef CONFIG_X86_32 @@ -34,6 +38,7 @@ #endif #endif /* !CONFIG_BUG */ +#endif /* CONFIG_COOPERATIVE */ #include #endif Index: linux-2.6.25-source/include/asm-x86/cooperative.h =================================================================== --- /dev/null +++ linux-2.6.25-source/include/asm-x86/cooperative.h @@ -0,0 +1,221 @@ +/* + * linux/include/asm/cooperative.h + * + * Copyright (C) 2004 Dan Aloni + * + * This file defines the lower level interfaces between the Cooperative Linux + * kernel and the host OS driver. It's for both external inclusion from the + * and internal inclusion in the kernel sources. + */ + +#ifndef __LINUX_ASM_COOPERATIVE_H__ +#define __LINUX_ASM_COOPERATIVE_H__ + +typedef struct { + unsigned short size; + struct x86_idt_entry *table; +} __attribute__((packed)) x86_idt_t; + +typedef struct { + unsigned short limit; + struct x86_dt_entry *base; +} __attribute__((packed)) x86_gdt_t; + +typedef struct { + unsigned char border2[0x4]; + + unsigned long cs; + #define CO_ARCH_STATE_STACK_CS "0x04" + + unsigned long ds; + #define CO_ARCH_STATE_STACK_DS "0x08" + + unsigned long es; + #define CO_ARCH_STATE_STACK_ES "0x0C" + + unsigned long cr3; + #define CO_ARCH_STATE_STACK_CR3 "0x10" + + unsigned long cr4; + #define CO_ARCH_STATE_STACK_CR4 "0x14" + + unsigned long cr2; + #define CO_ARCH_STATE_STACK_CR2 "0x18" + + unsigned long cr0; + #define CO_ARCH_STATE_STACK_CR0 "0x1C" + + x86_gdt_t gdt; + #define CO_ARCH_STATE_STACK_GDT "0x20" + + unsigned long fs; + #define CO_ARCH_STATE_STACK_FS "0x26" + + unsigned long gs; + #define CO_ARCH_STATE_STACK_GS "0x2A" + + unsigned short ldt; + #define CO_ARCH_STATE_STACK_LDT "0x2E" + + x86_idt_t idt; + #define CO_ARCH_STATE_STACK_IDT "0x30" + + unsigned short tr; + #define CO_ARCH_STATE_STACK_TR "0x36" + + unsigned long return_eip; + #define CO_ARCH_STATE_STACK_RETURN_EIP "0x38" + + unsigned long flags; + #define CO_ARCH_STATE_STACK_FLAGS "0x3C" + + unsigned long esp; + #define CO_ARCH_STATE_STACK_ESP "0x40" + + unsigned long ss; + #define CO_ARCH_STATE_STACK_SS "0x44" + + unsigned long dr0; + #define CO_ARCH_STATE_STACK_DR0 "0x48" + + unsigned long dr1; + #define CO_ARCH_STATE_STACK_DR1 "0x4C" + + unsigned long dr2; + #define CO_ARCH_STATE_STACK_DR2 "0x50" + + unsigned long dr3; + #define CO_ARCH_STATE_STACK_DR3 "0x54" + + unsigned long dr6; + #define CO_ARCH_STATE_STACK_DR6 "0x58" + + unsigned long dr7; + #define CO_ARCH_STATE_STACK_DR7 "0x5C" + + union { + unsigned long temp_cr3; + unsigned long other_map; + } __attribute__((packed)); + #define CO_ARCH_STATE_STACK_TEMP_CR3 "0x60" + #define CO_ARCH_STATE_STACK_OTHERMAP "0x60" + + unsigned long relocate_eip; + #define CO_ARCH_STATE_STACK_RELOCATE_EIP "0x64" + + unsigned long pad1; + #define CO_ARCH_STATE_STACK_RELOCATE_EIP_AFTER "0x68" + + unsigned long va; + #define CO_ARCH_STATE_STACK_VA "0x6C" + + unsigned long sysenter_cs; + #define CO_ARCH_STATE_SYSENTER_CS "0x70" + + unsigned long sysenter_esp; + #define CO_ARCH_STATE_SYSENTER_ESP "0x74" + + unsigned long sysenter_eip; + #define CO_ARCH_STATE_SYSENTER_EIP "0x78" +} __attribute__((packed)) co_arch_state_stack_t; + +#define CO_MAX_PARAM_SIZE 0x400 + +typedef struct co_arch_passage_page_normal_address_space { + unsigned long pgd[0x400]; + unsigned long pte[2][0x400]; +} co_arch_passage_page_normal_address_space_t; + +typedef struct co_arch_passage_page_pae_address_space { + unsigned long long main[0x200]; + unsigned long long pgd[2][0x200]; + unsigned long long pte[2][0x200]; +} co_arch_passage_page_pae_address_space_t; + +typedef struct co_arch_passage_page { + union { + struct { + union { + struct { + union { + unsigned long self_physical_address; + unsigned long temp_pgd_physical; + } __attribute__((packed)); + unsigned long dr0; + unsigned long dr1; + unsigned long dr2; + unsigned long dr3; + unsigned long dr6; + unsigned long dr7; + unsigned char code[0x260]; + } __attribute__((packed)); + unsigned char pad[0x280]; /* Be careful! see NOTE below */ + } __attribute__((packed)); + + /* Machine states */ + + /* + * NOTE: *_state fields must be aligned at 16 bytes boundary since + * the fxsave/fxload instructions expect an aligned arugment. + */ + + co_arch_state_stack_t host_state; + co_arch_state_stack_t linuxvm_state; + + /* Control parameters */ + unsigned long operation; + unsigned long params[]; + } __attribute__((packed)); + unsigned char first_page[0x1000]; + }; + + /* page tables for passage address spaces */ + union { + co_arch_passage_page_normal_address_space_t guest_normal; + co_arch_passage_page_normal_address_space_t temp_space; + } __attribute__((packed)); + union { + co_arch_passage_page_normal_address_space_t host_normal; + co_arch_passage_page_pae_address_space_t host_pae; + } __attribute__((packed)); +} co_arch_passage_page_t; + +/* + * Address space layout: + */ + +#define CO_VPTR_BASE (0xffc00000ul) +#define CO_VPTR_PHYSICAL_TO_PSEUDO_PFN_MAP (CO_VPTR_BASE - 0x1000000ul) +#define CO_VPTR_PSEUDO_RAM_PAGE_TABLES (CO_VPTR_BASE - 0x1100000ul) +#define CO_VPTR_PASSAGE_PAGE (CO_VPTR_BASE - 0x1101000ul) +#define CO_VPTR_IO_AREA_SIZE (0x10000ul) +#define CO_VPTR_IO_AREA_START (CO_VPTR_BASE - 0x1200000ul) +#define CO_VPTR_SELF_MAP (CO_VPTR_BASE - 0x1400000ul) + +#define CO_VPTR_BASE_START CO_VPTR_SELF_MAP +#define CO_VPTR_BASE_END CO_VPTR_BASE + +/* + * Calculation of maximal Linux memory + * + * 0xffffc000 FIXADDR_START + * 20 MB coLinux shared host + * 0xfe800000 CO_VPTR_BASE_START + * 2*PAGE_SIZE 8 KB free slot + * 0xfe7fe000 VMALLOC_END + * 7 MB vmalloc + * 0xfe000000 VMALLOC_START + * VMALLOC_OFFSET 8 MB free slot + * 0xfd800000 high_memory + * 984 MB lowmem + * 0xc0000000 PAGE_OFFSET + */ + +#define CO_LOWMEMORY_MAX_MB 984 + +typedef struct { + unsigned long kernel_cs; + unsigned long kernel_ds; +} __attribute__((packed)) co_arch_info_t; + +#endif Index: linux-2.6.25-source/include/asm-x86/cooperative_internal.h =================================================================== --- /dev/null +++ linux-2.6.25-source/include/asm-x86/cooperative_internal.h @@ -0,0 +1,53 @@ +/* + * linux/include/asm/cooperative_internal.h + * + * Copyright (C) 2004 Dan Aloni + */ + +#ifndef __LINUX_ASM_COOPERATIVE_INTERNAL_H__ +#define __LINUX_ASM_COOPERATIVE_INTERNAL_H__ + +#include + +#ifdef CONFIG_COOPERATIVE + +extern bool co_host_fpu_saved; +extern char co_host_fpu[0x200]; + +extern void co_kernel_breakpoint(struct pt_regs * regs); +extern int co_kernel_debug(struct pt_regs * regs, long error_code, unsigned int condition); + +#define CO_FPU_SAVE(x) \ +do \ +{ \ + if (cpu_has_fxsr) \ + asm("fxsave " #x " ; fnclex"); \ + else \ + asm("fnsave " #x " ; fwait"); \ +} \ +while (0) + +#define CO_FPU_RESTORE(x) \ +do \ +{ \ + if (cpu_has_fxsr) \ + asm("fxrstor " #x); \ + else \ + asm("frstor " #x); \ +} \ +while (0) + +#else + +static inline void co_kernel_breakpoint(struct pt_regs * regs) +{ +} + +static inline int co_kernel_debug(struct pt_regs * regs, long error_code, unsigned int condition) +{ + return 0; +} + +#endif + +#endif Index: linux-2.6.25-source/include/asm-x86/dma.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/dma.h +++ linux-2.6.25-source/include/asm-x86/dma.h @@ -288,6 +288,7 @@ * * Assumes DMA flip-flop is clear. */ +#ifndef CONFIG_COOPERATIVE static __inline__ int get_dma_residue(unsigned int dmanr) { unsigned int io_port; @@ -302,6 +303,7 @@ return (dmanr <= 3) ? count : (count << 1); } +#endif /* These are in kernel/dma.c: */ Index: linux-2.6.25-source/include/asm-x86/mach-default/irq_vectors.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/mach-default/irq_vectors.h +++ linux-2.6.25-source/include/asm-x86/mach-default/irq_vectors.h @@ -67,6 +67,17 @@ #define TIMER_IRQ 0 +#ifdef CONFIG_COOPERATIVE +#define KEYBOARD_IRQ 1 +#define SERIAL_IRQ 3 +#define SOUND_IRQ 5 +#define POWER_IRQ 9 +#define NETWORK_IRQ 10 +#define SCSI_IRQ 11 +#define MOUSE_IRQ 12 +#define BLOCKDEV_IRQ 15 +#endif + /* * 16 8259A IRQ's, 208 potential APIC interrupt sources. * Right now the APIC is mostly only used for SMP. Index: linux-2.6.25-source/include/asm-x86/mach-default/irq_vectors_limits.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/mach-default/irq_vectors_limits.h +++ linux-2.6.25-source/include/asm-x86/mach-default/irq_vectors_limits.h @@ -1,7 +1,7 @@ #ifndef _ASM_IRQ_VECTORS_LIMITS_H #define _ASM_IRQ_VECTORS_LIMITS_H -#if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) +#if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_X86_UP_COPIC) || defined(CONFIG_PARAVIRT) #define NR_IRQS 224 # if (224 >= 32 * NR_CPUS) # define NR_IRQ_VECTORS NR_IRQS Index: linux-2.6.25-source/include/asm-x86/mc146818rtc.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/mc146818rtc.h +++ linux-2.6.25-source/include/asm-x86/mc146818rtc.h @@ -83,6 +83,8 @@ #define current_lock_cmos_reg() 0 #endif +#ifndef CONFIG_COOPERATIVE + /* * The yet supported machines all access the RTC index register via * an ISA port access but the way to access the date register differs ... @@ -95,6 +97,11 @@ extern int mach_set_rtc_mmss(unsigned long nowtime); extern unsigned long mach_get_cmos_time(void); +#else +#define CMOS_READ(addr) (0) +#define CMOS_WRITE(val, addr) do {} while(0) +#endif + #define RTC_IRQ 8 #endif /* _ASM_MC146818RTC_H */ Index: linux-2.6.25-source/include/asm-x86/page.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/page.h +++ linux-2.6.25-source/include/asm-x86/page.h @@ -39,6 +39,20 @@ #define max_pfn_mapped max_low_pfn #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_COOPERATIVE +#define CO_PA(pfn) (((unsigned long *)CO_VPTR_PSEUDO_RAM_PAGE_TABLES)[pfn]) +#define CO_VA_PFN(pa) (((unsigned long *)CO_VPTR_PHYSICAL_TO_PSEUDO_PFN_MAP)[((pa) >> PAGE_SHIFT)]) +#define CO_PFN_PP_TO_P(pfn) (CO_PA(pfn) >> PAGE_SHIFT) +#define CO_PFN_P_TO_PP(pfn) (CO_VA_PFN(pfn << PAGE_SHIFT)) +#define CO_PP_TO_P(pa) ((CO_PFN_PP_TO_P(pa >> PAGE_SHIFT) << PAGE_SHIFT) | (pa & ~PAGE_MASK)) +#define CO_P_TO_PP(pa) ((CO_PFN_P_TO_PP(pa >> PAGE_SHIFT) << PAGE_SHIFT) | (pa & ~PAGE_MASK)) +#else +#define CO_PFN_P_TO_PP(pfn) pfn +#define CO_PFN_PP_TO_P(pfn) pfn +#define CO_PP_TO_P(pa) pa +#define CO_P_TO_PP(pa) pa +#endif + #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #define VM_DATA_DEFAULT_FLAGS \ @@ -47,6 +61,7 @@ #ifndef __ASSEMBLY__ +#include extern int page_is_ram(unsigned long pagenr); Index: linux-2.6.25-source/include/asm-x86/pgtable-2level.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/pgtable-2level.h +++ linux-2.6.25-source/include/asm-x86/pgtable-2level.h @@ -1,6 +1,8 @@ #ifndef _I386_PGTABLE_2LEVEL_H #define _I386_PGTABLE_2LEVEL_H +#include + #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) #define pgd_ERROR(e) \ @@ -53,7 +55,7 @@ #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) -#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) +#define pte_pfn(x) CO_PFN_P_TO_PP((pte_val(x) >> PAGE_SHIFT)) /* * Bits 0, 6 and 7 are taken, split up the 29 bits of offset Index: linux-2.6.25-source/include/asm-x86/pgtable.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/pgtable.h +++ linux-2.6.25-source/include/asm-x86/pgtable.h @@ -173,13 +173,13 @@ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | + return __pte((((phys_addr_t)CO_PFN_PP_TO_P(page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) & __supported_pte_mask); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | + return __pmd((((phys_addr_t)CO_PFN_PP_TO_P(page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) & __supported_pte_mask); } Index: linux-2.6.25-source/include/asm-x86/processor.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/processor.h +++ linux-2.6.25-source/include/asm-x86/processor.h @@ -201,6 +201,7 @@ } __attribute__((packed)) ____cacheline_aligned; #endif +#ifdef __KERNEL__ /* * Size of io_bitmap. */ @@ -626,6 +627,7 @@ ".byte 0x0f,0x01,0xc8;" : :"a" (eax), "c" (ecx), "d"(edx)); } +#endif /* __KERNEL__ */ static inline void __mwait(unsigned long eax, unsigned long ecx) { Index: linux-2.6.25-source/include/linux/cooperative.h =================================================================== --- /dev/null +++ linux-2.6.25-source/include/linux/cooperative.h @@ -0,0 +1,399 @@ +/* + * linux/include/linux/cooperative.h + * + * Copyright (C) 2004 Dan Aloni + * + * This file defines the interfaces between the Cooperative Linux kernel + * and the host OS driver. It's for both external inclusion from the + * and internal inclusion in the kernel sources. + */ + +#ifndef __LINUX_COOPERATIVE_H__ +#define __LINUX_COOPERATIVE_H__ + +#ifdef __KERNEL__ +#ifndef CO_KERNEL +#define CO_COLINUX_KERNEL +#define CO_KERNEL +#endif +#endif + +#include + +#define CO_LINUX_API_VERSION 14 + +#pragma pack(0) + +#define CO_BOOTPARAM_STRING_LENGTH 0x100 + +typedef enum { + CO_OPERATION_EMPTY=0, + CO_OPERATION_START, + CO_OPERATION_IDLE, + CO_OPERATION_TERMINATE, + CO_OPERATION_MESSAGE_TO_MONITOR, + CO_OPERATION_MESSAGE_FROM_MONITOR, + CO_OPERATION_FORWARD_INTERRUPT, + CO_OPERATION_DEVICE, + CO_OPERATION_GET_TIME, + CO_OPERATION_DEBUG_LINE, + CO_OPERATION_GET_HIGH_PREC_TIME, + CO_OPERATION_TRACE_POINT, + CO_OPERATION_FREE_PAGES, + CO_OPERATION_ALLOC_PAGES, + CO_OPERATION_PRINTK_unused, + CO_OPERATION_GETPP, + CO_OPERATION_MAX /* Must be last entry all times */ +} co_operation_t; + +#define CO_MODULE_MAX_CONET 16 +#define CO_MODULE_MAX_COBD 32 +#define CO_MODULE_MAX_COFS 32 +#define CO_MODULE_MAX_SERIAL 32 +#define CO_MODULE_MAX_COSCSI 32 +#define CO_MODULE_MAX_COVIDEO 2 +#define CO_MODULE_MAX_COAUDIO 2 + +typedef enum { + CO_MODULE_LINUX, + CO_MODULE_MONITOR, + CO_MODULE_DAEMON, + CO_MODULE_IDLE, + CO_MODULE_KERNEL_SWITCH, + CO_MODULE_USER_SWITCH, + CO_MODULE_CONSOLE, + CO_MODULE_PRINTK, + + CO_MODULE_CONET0, + CO_MODULE_CONET_END=CO_MODULE_CONET0+CO_MODULE_MAX_CONET-1, + + CO_MODULE_COBD0, + CO_MODULE_COBD_END=CO_MODULE_COBD0+CO_MODULE_MAX_COBD-1, + + CO_MODULE_COFS0, + CO_MODULE_COFS_END=CO_MODULE_COFS0+CO_MODULE_MAX_COFS-1, + + CO_MODULE_SERIAL0, + CO_MODULE_SERIAL_END=CO_MODULE_SERIAL0+CO_MODULE_MAX_SERIAL-1, + + CO_MODULE_COSCSI0, + CO_MODULE_COSCSI_END=CO_MODULE_COSCSI0+CO_MODULE_MAX_COSCSI-1, + + CO_MODULE_COVIDEO0, + CO_MODULE_COVIDEO_END=CO_MODULE_COVIDEO0+CO_MODULE_MAX_COVIDEO-1, + + CO_MODULE_COAUDIO0, + CO_MODULE_COAUDIO_END=CO_MODULE_COAUDIO0+CO_MODULE_MAX_COAUDIO-1, + CO_MODULES_MAX, +} co_module_t; + +typedef enum { + CO_PRIORITY_DISCARDABLE=0, + CO_PRIORITY_IMPORTANT, +} co_priority_t; + +typedef enum { + CO_MESSAGE_TYPE_STRING=0, + CO_MESSAGE_TYPE_OTHER=1, +} co_message_type_t; + +typedef struct { + co_module_t from; + co_module_t to; + co_priority_t priority; + co_message_type_t type; + unsigned long size; + unsigned char data[0]; +} __attribute__((packed)) co_message_t; + +typedef enum { + CO_DEVICE_BLOCK=0, + CO_DEVICE_CONSOLE, + CO_DEVICE_KEYBOARD, + CO_DEVICE_NETWORK, + CO_DEVICE_TIMER, + CO_DEVICE_POWER, + CO_DEVICE_SERIAL, + CO_DEVICE_FILESYSTEM, + + CO_DEVICE_MOUSE, + CO_DEVICE_SCSI, + CO_DEVICE_VIDEO, + CO_DEVICE_AUDIO, + CO_DEVICE_PCI, + + CO_DEVICES_TOTAL, +} co_device_t; + +typedef enum { + CO_KBD_SCANCODE_RAW=0, + CO_KBD_SCANCODE_ASCII +} mode_data_from_keyboard_t; + +typedef struct { + unsigned char code; + mode_data_from_keyboard_t mode; +} __attribute__((packed)) co_scan_code_t; + +#define CO_MOUSE_MAX_X 2048 +#define CO_MOUSE_MAX_Y 2048 + +typedef struct { + unsigned btns; + unsigned abs_x; + unsigned abs_y; + int rel_z; +} __attribute__((packed)) co_mouse_data_t; + +typedef enum { + CO_LINUX_MESSAGE_POWER_ALT_CTRL_DEL=0, + CO_LINUX_MESSAGE_POWER_SHUTDOWN, + CO_LINUX_MESSAGE_POWER_OFF, +} co_linux_message_power_type_t; + +typedef struct { + co_linux_message_power_type_t type; +} __attribute__((packed)) co_linux_message_power_t; + +typedef struct { + unsigned long tick_count; +} __attribute__((packed)) co_linux_message_idle_t; + +typedef struct { + co_device_t device; + unsigned int unit; + unsigned long size; + char data[]; +} __attribute__((packed)) co_linux_message_t; + +typedef enum { + CO_TERMINATE_END=0, + CO_TERMINATE_REBOOT, + CO_TERMINATE_POWEROFF, + CO_TERMINATE_PANIC, + CO_TERMINATE_HALT, + CO_TERMINATE_FORCED_OFF, + CO_TERMINATE_FORCED_END, + CO_TERMINATE_INVALID_OPERATION, + CO_TERMINATE_STACK_OVERFLOW, + CO_TERMINATE_BUG, + CO_TERMINATE_VMXE, +} co_termination_reason_t; + +#ifdef CO_KERNEL + +#ifndef asmlinkage +#define asmlinkage __attribute__((regparm(0))) +#endif + +typedef void asmlinkage (*co_switcher_t)(co_arch_passage_page_t *page, + void *from, + void *to); + +#define co_passage_page_func_low(_from_,_to_) \ + (((co_switcher_t)(co_passage_page->code)) \ + (co_passage_page, \ + &_from_.border2, \ + &_to_.border2)) + +# ifdef CO_COLINUX_KERNEL +# define co_passage_page ((co_arch_passage_page_t *)(CO_VPTR_PASSAGE_PAGE)) +# define co_current (co_passage_page->linuxvm_state) +# define co_other (co_passage_page->host_state) +# else +# define co_passage_page (cmon->passage_page) +# define co_other (co_passage_page->linuxvm_state) +# define co_current (co_passage_page->host_state) +# endif + +# define co_switch() co_passage_page_func_low(co_current, co_other) + +#endif /* CO_KERNEL */ + +/* + * Defines operations on various virtual devices. + */ + +typedef enum { + CO_OPERATION_CONSOLE_STARTUP=0, + CO_OPERATION_CONSOLE_INIT=1, + CO_OPERATION_CONSOLE_DEINIT, + CO_OPERATION_CONSOLE_CLEAR, + CO_OPERATION_CONSOLE_PUTC, + CO_OPERATION_CONSOLE_PUTCS, + CO_OPERATION_CONSOLE_CURSOR_DRAW, + CO_OPERATION_CONSOLE_CURSOR_ERASE, + CO_OPERATION_CONSOLE_CURSOR_MOVE, + CO_OPERATION_CONSOLE_SCROLL_UP, + CO_OPERATION_CONSOLE_SCROLL_DOWN, + CO_OPERATION_CONSOLE_BMOVE, + CO_OPERATION_CONSOLE_SWITCH, + CO_OPERATION_CONSOLE_BLANK, + CO_OPERATION_CONSOLE_FONT_OP, + CO_OPERATION_CONSOLE_SET_PALETTE, + CO_OPERATION_CONSOLE_SCROLLDELTA, + CO_OPERATION_CONSOLE_SET_ORIGIN, + CO_OPERATION_CONSOLE_SAVE_SCREEN, + CO_OPERATION_CONSOLE_INVERT_REGION, + CO_OPERATION_CONSOLE_CONFIG, + CO_OPERATION_CONSOLE_INIT_SCROLLBUFFER, +} co_operation_console_t; + + +typedef char co_console_code; +typedef unsigned short co_console_character; +typedef unsigned short co_console_unit; + +typedef struct { + co_console_unit x; + co_console_unit y; + co_console_unit height; +} __attribute__((packed)) co_cursor_pos_t; + +typedef struct { + co_operation_console_t type; + union { + struct { + co_console_unit top; + co_console_unit bottom; + co_console_unit lines; + co_console_character charattr; + } scroll; + struct { + co_console_unit y; + co_console_unit x; + co_console_unit count; + co_console_character data[]; + } putcs; + struct { + co_console_unit x; + co_console_unit y; + co_console_character charattr; + } putc; + struct { + co_console_unit top; + co_console_unit left; + co_console_unit bottom; + co_console_unit right; + co_console_character charattr; + } clear; + struct { + co_console_unit y; + co_console_unit x; + co_console_unit count; + } invert; + struct { + co_console_unit row; + co_console_unit column; + co_console_unit top; + co_console_unit left; + co_console_unit bottom; + co_console_unit right; + } bmove; + struct { + co_console_unit rows; + co_console_unit cols; + co_console_unit attr; + } config; + co_cursor_pos_t cursor; + }; +} __attribute__((packed)) co_console_message_t; + +typedef struct { + unsigned long messages_waiting; + unsigned char buffer[]; +} co_io_buffer_t; + +typedef struct { + unsigned long index; + unsigned long flags; + unsigned long func; + unsigned long pid; +} __attribute__((packed)) co_trace_point_info_t; + +typedef enum { + CO_BLOCK_OPEN=0, + CO_BLOCK_STAT, + CO_BLOCK_READ, + CO_BLOCK_WRITE, + CO_BLOCK_CLOSE, + CO_BLOCK_GET_ALIAS, +} co_block_request_type_t; + +typedef enum { + CO_BLOCK_REQUEST_RETCODE_OK=0, + CO_BLOCK_REQUEST_RETCODE_ERROR=-1, +} co_block_request_retcode_t; + +typedef enum { + CO_NETWORK_GET_MAC=0, +} co_network_request_type_t; + +#ifdef CO_KERNEL +/* If we are compiling kernel code (Linux or Host Driver) */ +# ifdef CO_COLINUX_KERNEL +/* Inside Linux, vm_ptr_t considered a valid pointer in its virtual address space */ +typedef void *vm_ptr_t; +# else +/* But inside the host, the type is considered not to be a pointer in its own address space */ +typedef unsigned long vm_ptr_t; +# endif + +typedef struct { + co_block_request_type_t type; + long rc; + union { + struct { + unsigned long long offset; + unsigned long long size; + unsigned long long disk_size; + vm_ptr_t address; + void * irq_request; + int async; + }; + struct { + char alias[20]; + }; + }; +} __attribute__((packed)) co_block_request_t; + +typedef struct { + void * irq_request; + int uptodate; +} __attribute__((packed)) co_block_intr_t; + +typedef struct { + co_network_request_type_t type; + unsigned int unit; + char mac_address[6]; + char _pad[2]; + int result; +} __attribute__((packed)) co_network_request_t; + +#endif /* CO_KERNEL */ + +typedef struct { + unsigned long api_version; + unsigned long compiler_major; + unsigned long compiler_minor; + unsigned long compiler_abi; +} __attribute__((packed)) co_info_t; + +typedef struct { + unsigned long co_core_end; + unsigned long co_memory_size; + void *co_initrd; + unsigned long co_initrd_size; + unsigned long co_cpu_khz; + unsigned long filler[5]; // compatible old api: empty 5,6,7,8,9 + char co_boot_parameters[CO_BOOTPARAM_STRING_LENGTH]; // params[10] +} __attribute__((packed)) co_boot_params_t; + +#ifndef COLINUX_TRACE +#define CO_TRACE_STOP +#define CO_TRACE_CONTINUE +#endif + +#pragma pack() + +#endif Index: linux-2.6.25-source/include/linux/cooperative_internal.h =================================================================== --- /dev/null +++ linux-2.6.25-source/include/linux/cooperative_internal.h @@ -0,0 +1,128 @@ +/* + * linux/include/linux/cooperative_internal.h + * + * Copyright (C) 2004 Dan Aloni + * + * This header gathers the functions and variables in Cooperative Mode + * when CONFIG_COOPERATIVE is defined. + */ +#ifndef __LINUX_COOPERATIVE_LINUX_H__ +#define __LINUX_COOPERATIVE_LINUX_H__ + +#include +#include +#include + +#ifdef CONFIG_COOPERATIVE + +typedef struct { + struct list_head node; + co_message_t msg; +} co_message_node_t; + +extern co_boot_params_t co_boot_params; +extern int co_passage_page_holding_count; + +#ifdef CONFIG_COLINUX_STATS +typedef struct co_proc_counts { + unsigned long switches[CO_OPERATION_MAX]; +} co_proc_counts_t; + +extern co_proc_counts_t co_proc_counts; +#endif + +#define co_io_buffer ((co_io_buffer_t *)CO_VPTR_IO_AREA_START) +#define cooperative_mode_enabled() 1 + +extern void co_debug(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); +extern void co_printk(const char *line, int size); + +extern void co_switch_wrapper(void); +extern void co_callback(struct pt_regs *regs); +extern void co_idle_processor(void); +extern void co_terminate(co_termination_reason_t reason); +extern void co_terminate_panic(const char *text, int len); +extern void co_terminate_bug(int code, int line, const char *file); +extern void co_free_pages(unsigned long vaddr, int pages); +extern int co_alloc_pages(unsigned long vaddr, int pages); +extern void co_start_kernel(void); +extern void co_arch_start_kernel(void); + +extern void co_send_message(co_module_t from, + co_module_t to, + co_priority_t priority, + co_message_type_t type, + unsigned long size, + const char *data); +extern unsigned long co_get_host_time(void); + +extern int co_get_message(co_message_node_t **message, co_device_t device); +static inline void co_free_message(co_message_node_t *message) +{ + kfree(message); +} + +extern void *co_map_buffer(void *, int); + +static inline void co_passage_page_acquire(unsigned long *flags) +{ + local_irq_save(*flags); + co_passage_page_holding_count++; +} + +static inline void co_passage_page_ref_down(void) +{ + co_passage_page_holding_count--; +} + +static inline void co_passage_page_ref_up(void) +{ + co_passage_page_holding_count++; +} + +static inline int co_passage_page_held(void) +{ + return co_passage_page_holding_count; +} + +static inline void co_passage_page_release(unsigned long flags) +{ + co_passage_page_holding_count--; + local_irq_restore(flags); +} + +#define co_passage_page_assert_valid() do { \ + BUG_ON(co_passage_page_held()); \ +} while (0) + +static inline co_message_t *co_send_message_save(unsigned long *flags) +{ + co_passage_page_assert_valid(); + co_passage_page_acquire(flags); + + if (co_io_buffer->messages_waiting) { + co_passage_page_release(*flags); + return NULL; + } + + co_passage_page->operation = CO_OPERATION_MESSAGE_TO_MONITOR; + co_io_buffer->messages_waiting = 1; + return ((co_message_t *)co_io_buffer->buffer); +} + +static inline void co_send_message_restore(unsigned long flags) +{ + co_switch_wrapper(); + co_passage_page_release(flags); +} + +#else + +#define co_printk(line, size) do {} while (0) +#define co_terminate(reason) do {} while (0) +#define cooperative_mode_enabled() 0 + +#endif + +#endif Index: linux-2.6.25-source/kernel/Makefile =================================================================== --- linux-2.6.25-source.orig/kernel/Makefile +++ linux-2.6.25-source/kernel/Makefile @@ -63,6 +63,7 @@ obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o endif obj-$(CONFIG_RELAY) += relay.o +obj-$(CONFIG_COOPERATIVE) += cooperative.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o Index: linux-2.6.25-source/kernel/cooperative.c =================================================================== --- /dev/null +++ linux-2.6.25-source/kernel/cooperative.c @@ -0,0 +1,461 @@ +/* + * linux/kernel/cooperative.c + * + * Cooperative mode (coLinux) support routines. + * + * Dan Aloni , 2003-2004 (C). + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +CO_TRACE_STOP; + +co_boot_params_t co_boot_params; + +typedef struct { + struct list_head list; + int num_messages; +} co_message_queue_t; + +int co_messages_active = 0; +co_message_queue_t co_outgoing_messages; +co_message_queue_t co_incoming_messages; +co_message_queue_t *co_incoming_queued_messages; + +#ifdef CONFIG_COLINUX_STATS +co_proc_counts_t co_proc_counts; +#endif + +void __init co_start_kernel(void) +{ + memcpy(&co_boot_params, co_passage_page->params, sizeof(co_boot_params)); + + co_arch_start_kernel(); + + /* should never be reached */ + co_terminate(CO_TERMINATE_END); +} + +void co_send_message(co_module_t from, + co_module_t to, + co_priority_t priority, + co_message_type_t type, + unsigned long size, + const char *data) +{ + unsigned long flags; + co_message_t *buffer; + + BUG_ON((sizeof(co_message_t) + size) > CO_VPTR_IO_AREA_SIZE - sizeof(co_io_buffer_t)); + + buffer = co_send_message_save(&flags); + if (!buffer) + return; + + buffer->from = from; + buffer->to = to; + buffer->priority = priority; + buffer->type = type; + buffer->size = size; + memcpy(buffer->data, data, size); + + co_send_message_restore(flags); +} + +static void co_message_add_to_incoming(co_message_t *message, unsigned long size) +{ + co_message_node_t *message_copy; + + message_copy = kmalloc(size + sizeof(co_message_node_t) - sizeof(co_message_t), + GFP_ATOMIC); + if (!message_copy) + return; + + memcpy(&message_copy->msg, message, size); + list_add_tail(&message_copy->node, &co_incoming_messages.list); +} + +static void co_handle_jiffies(long count) +{ + if (count > HZ) { + long secs = count / HZ; + + /* 'warp_clock' for long distances */ + write_seqlock(&xtime_lock); + xtime.tv_sec += secs; + count -= (secs * HZ); + update_xtime_cache(0); + write_sequnlock(&xtime_lock); + clock_was_set(); + } + + while (count > 0) { + irq_enter(); + __do_IRQ(TIMER_IRQ); + irq_exit(); + count--; + } +} + +/* called with disabled interrupts */ +static void co_handle_incoming_message(co_message_node_t *node_message) +{ + co_linux_message_t *message; + co_message_queue_t *queue; + int irq; + + message = (co_linux_message_t *)&node_message->msg.data; + switch (message->device) { + case CO_DEVICE_POWER: irq = POWER_IRQ; break; + case CO_DEVICE_KEYBOARD: irq = KEYBOARD_IRQ; break; +#ifdef CONFIG_CONET_COOPERATIVE + case CO_DEVICE_NETWORK: irq = NETWORK_IRQ; break; +#endif +#ifdef CONFIG_SERIAL_COOPERATIVE + case CO_DEVICE_SERIAL: irq = SERIAL_IRQ; break; +#endif + case CO_DEVICE_SCSI: irq = SCSI_IRQ; break; + case CO_DEVICE_MOUSE: irq = MOUSE_IRQ; break; + case CO_DEVICE_BLOCK: irq = BLOCKDEV_IRQ; break; + default: + BUG_ON((unsigned long)message->device >= (unsigned long)CO_DEVICES_TOTAL); + co_free_message(node_message); + return; + } + + /* Add to the queue */ + queue = &co_incoming_queued_messages[message->device]; + list_add(&node_message->node, &queue->list); + queue->num_messages++; + + irq_enter(); + __do_IRQ(irq); + irq_exit(); +} + +static void co_handle_incoming_messages(void) +{ + if (!co_messages_active) + return; + + /* + * Pop a message from the incoming queue. + */ + while (!list_empty(&co_incoming_messages.list)) { + co_message_node_t *message; + + message = list_entry(co_incoming_messages.list.next, + co_message_node_t, node); + BUG_ON((unsigned long)message->msg.from >= (unsigned long)CO_MODULES_MAX); + BUG_ON((unsigned long)message->msg.to >= (unsigned long)CO_MODULES_MAX); + list_del(&message->node); + + /* + * Let the interrupt routine of the arch dependant code + * handle the message, and be responsible to free it. + */ + co_handle_incoming_message(message); + } +} + +void co_callback(struct pt_regs *regs) +{ + long io_size; + unsigned long new_jiffies; + struct pt_regs null_regs; + + BUG_ON(!irqs_disabled()); + if (co_passage_page->operation != CO_OPERATION_MESSAGE_FROM_MONITOR) { + co_passage_page_ref_down(); + return; + } + +#ifdef CONFIG_COLINUX_STATS + co_proc_counts.switches[CO_OPERATION_MESSAGE_FROM_MONITOR]++; +#endif + io_size = co_passage_page->params[0]; + new_jiffies = co_passage_page->params[1]; + + if (co_messages_active && io_size > 0 && io_size <= CO_VPTR_IO_AREA_SIZE) { + static unsigned char temp_storage[CO_VPTR_IO_AREA_SIZE]; + unsigned char *io_buffer = temp_storage; + unsigned char *io_buffer_end = &temp_storage[io_size]; + + /* Copy into temp, because kmalloc calls host to mapping pages */ + memcpy(temp_storage, co_io_buffer->buffer, io_size); + co_io_buffer->messages_waiting = 0; + co_passage_page_ref_down(); + + while (io_buffer < io_buffer_end) { + co_message_t *message = (co_message_t *)io_buffer; + co_linux_message_t *linux_message = (co_linux_message_t *)message->data; + unsigned long size = message->size + sizeof(*message); + + BUG_ON((unsigned long)message->from >= (unsigned long)CO_MODULES_MAX); + BUG_ON((unsigned long)message->to >= (unsigned long)CO_MODULES_MAX); + BUG_ON((unsigned long)linux_message->device >= (unsigned long)CO_DEVICES_TOTAL); + + co_message_add_to_incoming(message, size); + io_buffer += size; + } + } else { + co_io_buffer->messages_waiting = 0; + co_passage_page_ref_down(); + } + + memset (&null_regs, 0, sizeof(null_regs)); + + /* Have only, if from proxy_interrupt_handler(), needs for user_mode() */ + if (regs) + null_regs.cs = regs->cs; + set_irq_regs(&null_regs); + + co_handle_jiffies(new_jiffies); + co_handle_incoming_messages(); +} + +void co_idle_processor(void) +{ + co_passage_page_assert_valid(); + local_irq_disable(); + co_passage_page_ref_up(); + co_passage_page->operation = CO_OPERATION_IDLE; + co_switch_wrapper(); + co_callback(NULL); + local_irq_enable(); +} + +void co_printk(const char *line, int size) +{ + unsigned long flags; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + if (co_message) { + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_PRINTK; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + if (size > 200) + size = 200; + co_message->size = size + 1; + memcpy(co_message->data, line, size); + co_message->data[size] = '\0'; + co_send_message_restore(flags); + } +} + +NORET_TYPE void co_terminate_panic(const char *text, int len) +{ + unsigned long flags; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_TERMINATE; + co_passage_page->params[0] = CO_TERMINATE_PANIC; + co_passage_page->params[1] = 0; + co_passage_page->params[2] = 0; + co_passage_page->params[3] = len; + memcpy((char *)&co_passage_page->params[4], text, len+1); + co_switch_wrapper(); + /* This doesn't never return. */ +} + +NORET_TYPE void co_terminate_bug(int code, int line, const char *file) +{ + unsigned long flags; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_TERMINATE; + co_passage_page->params[0] = CO_TERMINATE_BUG; + co_passage_page->params[1] = code; + co_passage_page->params[2] = line; + co_passage_page->params[3] = strlen(file); + strcpy((char *)&co_passage_page->params[4], file); + co_switch_wrapper(); + /* This doesn't never return. */ +} +EXPORT_SYMBOL(co_terminate_bug); + +NORET_TYPE void co_terminate(co_termination_reason_t reason) +{ + unsigned long flags; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_TERMINATE; + co_passage_page->params[0] = reason; + co_passage_page->params[3] = 0; /* len */ + co_switch_wrapper(); + /* This doesn't never return. */ +} +EXPORT_SYMBOL(co_terminate); + +unsigned long co_get_host_time(void) +{ + unsigned long flags; + unsigned long time; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_GET_TIME; + co_switch_wrapper(); + time = co_passage_page->params[0]; + co_passage_page_release(flags); + + return time; +} + +int co_get_message(co_message_node_t **message, co_device_t device) +{ + co_message_queue_t *queue; + co_message_node_t *node; + unsigned long flags; + + if (!co_messages_active) + return 0; + + local_irq_save(flags); + queue = &co_incoming_queued_messages[device]; + if (list_empty(&queue->list)) { + local_irq_restore(flags); + return 0; + } + + node = list_entry(queue->list.prev, co_message_node_t, node); + list_del(&node->node); + queue->num_messages--; + local_irq_restore(flags); + + *message = node; + return 1; +} + +co_info_t co_info = { + .api_version = CO_LINUX_API_VERSION, + .compiler_major = __GNUC__, + .compiler_minor = __GNUC_MINOR__, + .compiler_abi = __GXX_ABI_VERSION, +}; + +static int __init initcall_message_queues(void) +{ + int queue_index; + + INIT_LIST_HEAD(&co_outgoing_messages.list); + INIT_LIST_HEAD(&co_incoming_messages.list); + + co_incoming_queued_messages = + kmalloc(sizeof(co_message_queue_t) * CO_DEVICES_TOTAL, GFP_KERNEL); + if (!co_incoming_queued_messages) + panic("unable to allocate message queues\n"); + + for (queue_index=0; queue_index < CO_DEVICES_TOTAL; queue_index++) { + co_message_queue_t *queue = &co_incoming_queued_messages[queue_index]; + queue->num_messages = 0; + INIT_LIST_HEAD(&queue->list); + } + + co_messages_active = 1; + + return 0; +} + + +void co_free_pages(unsigned long vaddr, int pages) +{ + unsigned long flags; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_FREE_PAGES; + co_passage_page->params[0] = vaddr; + co_passage_page->params[1] = pages; + co_switch_wrapper(); + co_passage_page_release(flags); +} + +int co_alloc_pages(unsigned long vaddr, int size) +{ + unsigned long flags; + long result; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_ALLOC_PAGES; + co_passage_page->params[0] = vaddr; + co_passage_page->params[1] = size; + co_switch_wrapper(); + result = (long)co_passage_page->params[4]; + co_passage_page_release(flags); + + if (result < 0) + return -ENOMEM; + + return 0; +} + +__initcall(initcall_message_queues); + +#ifdef CONFIG_COLINUX_STATS +static int co_guest_read_status(char *page, char **start, off_t off, int count, int *eof, void *clear) +{ + co_proc_counts_t hold; + int len; + + local_irq_disable(); + hold = co_proc_counts; + if (clear) + memset(&co_proc_counts, 0, sizeof(co_proc_counts)); + local_irq_enable(); + + len = sprintf(page, "idle:\t%lu\n" + "msgto:\t%lu\n" + "mfrom:\t%lu\n" + "intr:\t%lu\n" + "dev:\t%lu\n" + "time:\t%lu\n" + "hpt:\t%lu\n" + "free:\t%lu\n" + "alloc:\t%lu\n" + "getpp:\t%lu\n", + hold.switches[CO_OPERATION_IDLE], + hold.switches[CO_OPERATION_MESSAGE_TO_MONITOR], + hold.switches[CO_OPERATION_MESSAGE_FROM_MONITOR], + hold.switches[CO_OPERATION_FORWARD_INTERRUPT], + hold.switches[CO_OPERATION_DEVICE], + hold.switches[CO_OPERATION_GET_TIME], + hold.switches[CO_OPERATION_GET_HIGH_PREC_TIME], + hold.switches[CO_OPERATION_FREE_PAGES], + hold.switches[CO_OPERATION_ALLOC_PAGES], + hold.switches[CO_OPERATION_GETPP]); + + return len; +} + +static __init int co_create_proc_stats(void) +{ + struct proc_dir_entry *co_guest_dir; + + co_guest_dir = proc_mkdir("colinux", NULL); + if(co_guest_dir) { + create_proc_read_entry("stats", + 0444, co_guest_dir, + co_guest_read_status, NULL); + create_proc_read_entry("stats_clear", + 0444, co_guest_dir, + co_guest_read_status, (void*)1); + } + + return 0; +} + +__initcall(co_create_proc_stats); +#endif /* CONFIG_COLINUX_STATS */ + +CO_TRACE_CONTINUE; Index: linux-2.6.25-source/kernel/panic.c =================================================================== --- linux-2.6.25-source.orig/kernel/panic.c +++ linux-2.6.25-source/kernel/panic.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -77,11 +78,15 @@ bust_spinlocks(1); va_start(args, fmt); - vsnprintf(buf, sizeof(buf), fmt, args); + i = vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); bust_spinlocks(0); + if (cooperative_mode_enabled()) { + co_terminate_panic(buf, i); + } + /* * If we have crashed and we have a crash kernel loaded let it handle * everything else. Index: linux-2.6.25-source/kernel/printk.c =================================================================== --- linux-2.6.25-source.orig/kernel/printk.c +++ linux-2.6.25-source/kernel/printk.c @@ -42,6 +42,8 @@ { } +#include + #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) /* printk's without a loglevel use this.. */ @@ -715,6 +717,8 @@ printed_len += vscnprintf(printk_buf + printed_len, sizeof(printk_buf) - printed_len, fmt, args); + co_printk(printk_buf, printed_len); + /* * Copy the output into log_buf. If the caller didn't provide * appropriate log level tags, we insert them here Index: linux-2.6.25-source/mm/bootmem.c =================================================================== --- linux-2.6.25-source.orig/mm/bootmem.c +++ linux-2.6.25-source/mm/bootmem.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -321,6 +322,23 @@ for (i = start; i < start + areasize; i++) if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) BUG(); + + if (cooperative_mode_enabled()) { + unsigned long alloc_address = (unsigned long)ret; + unsigned long alloc_size = size; + int result; + + alloc_size += (alloc_address & (~PAGE_MASK)); + alloc_address &= PAGE_MASK; + alloc_size = (alloc_size + PAGE_SIZE - 1) >> PAGE_SHIFT; + + result = co_alloc_pages(alloc_address, alloc_size); + if (result) { + free_bootmem((unsigned long)ret, size); + return NULL; + } + } + memset(ret, 0, size); return ret; } Index: linux-2.6.25-source/mm/page_alloc.c =================================================================== --- linux-2.6.25-source.orig/mm/page_alloc.c +++ linux-2.6.25-source/mm/page_alloc.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -309,10 +310,44 @@ } } +static int co_persistent_alloc_pages(unsigned long address, int size) +{ + int result, retries_left; + + for (retries_left = 10; retries_left > 0; retries_left--) { + result = co_alloc_pages(address, size); + if (result) { + unsigned long cache_size; + /* + * Whoops, we have allocated too much of the + * host OS's memory, time to free some cache. + * cache. + */ + cache_size = global_page_state(NR_FILE_PAGES)-total_swapcache_pages; + cache_size /= 2; + if (cache_size < size*2) + cache_size = size*2; + shrink_all_memory(cache_size); + } else { + return 0; + } + } + + return result; + +} + static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) { int i; + if (cooperative_mode_enabled() && + !TestSetPageCoHostMapped(page)) { + int result; + result = co_persistent_alloc_pages((unsigned long)page_address(page), 1 << order); + BUG_ON(result != 0); + } + /* * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO * and __GFP_HIGHMEM from hard or soft interrupt context. @@ -425,6 +460,11 @@ int order_size = 1 << order; int migratetype = get_pageblock_migratetype(page); + if (cooperative_mode_enabled()) { + co_free_pages((unsigned long)page_address(page), order_size); + ClearPageCoHostMapped(page); + } + if (unlikely(PageCompound(page))) destroy_compound_page(page, order); @@ -1645,7 +1685,16 @@ dump_stack(); show_mem(); } + return NULL; got_pg: + if (cooperative_mode_enabled() && + !TestSetPageCoHostMapped(page)) { + int result = co_persistent_alloc_pages((unsigned long)page_address(page), 1 << order); + if (result) { + __free_pages(page, order); + return NULL; + } + } return page; } Index: linux-2.6.25-source/mm/vmscan.c =================================================================== --- linux-2.6.25-source.orig/mm/vmscan.c +++ linux-2.6.25-source/mm/vmscan.c @@ -1722,7 +1722,7 @@ wake_up_interruptible(&pgdat->kswapd_wait); } -#ifdef CONFIG_PM +#if defined(CONFIG_PM) || defined(CONFIG_COOPERATIVE) /* * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages * from LRU lists system-wide, for given pass and priority, and returns the Index: linux-2.6.25-source/drivers/char/mem.c =================================================================== --- linux-2.6.25-source.orig/drivers/char/mem.c +++ linux-2.6.25-source/drivers/char/mem.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -119,6 +120,9 @@ ssize_t read, sz; char *ptr; + if (cooperative_mode_enabled()) + return -ENOMEM; + if (!valid_phys_addr_range(p, count)) return -EFAULT; read = 0; @@ -177,6 +181,9 @@ unsigned long copied; void *ptr; + if (cooperative_mode_enabled()) + return -ENOMEM; + if (!valid_phys_addr_range(p, count)) return -EFAULT; @@ -275,6 +282,9 @@ { size_t size = vma->vm_end - vma->vm_start; + if (cooperative_mode_enabled()) + return -EFAULT; + if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) return -EINVAL; @@ -299,6 +309,9 @@ { unsigned long pfn; + if (cooperative_mode_enabled()) + return -EFAULT; + /* Turn a kernel-virtual address into a physical page frame */ pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT; @@ -363,6 +376,9 @@ ssize_t low_count, read, sz; char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ + if (cooperative_mode_enabled()) + return -ENOMEM; + read = 0; if (p < (unsigned long) high_memory) { low_count = count; @@ -512,6 +528,9 @@ ssize_t written; char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ + if (cooperative_mode_enabled()) + return -ENOMEM; + if (p < (unsigned long) high_memory) { wrote = count; Index: linux-2.6.25-source/arch/x86/kernel/cpu/bugs.c =================================================================== --- linux-2.6.25-source.orig/arch/x86/kernel/cpu/bugs.c +++ linux-2.6.25-source/arch/x86/kernel/cpu/bugs.c @@ -25,6 +25,8 @@ __setup("no-hlt", no_halt); +// Ignore, if have no emulation compiled in +#ifdef CONFIG_MATH_EMULATION static int __init no_387(char *s) { boot_cpu_data.hard_math = 0; @@ -33,6 +35,7 @@ } __setup("no387", no_387); +#endif static double __initdata x = 4195835.0; static double __initdata y = 3145727.0; Index: linux-2.6.25-source/arch/x86/kernel/reboot_cooperative.c =================================================================== --- /dev/null +++ linux-2.6.25-source/arch/x86/kernel/reboot_cooperative.c @@ -0,0 +1,133 @@ +/* + * linux/arch/x86/kernel/reboot_cooperative.c + */ + +#include +#include +#include +#include +#include +#include + +/* sys_reboot needs this dummy. */ +void (*pm_power_off)(void); + +/* + * This is a hack to make ctrl_alt_del work as a poweroff, so the OS can be + * notified, unmount and sync everything. + */ +static int co_powering_off; + +static int co_powerd(void *__unused) +{ + extern char * envp_init[]; + static char *const argv_shutdown[]= { "/sbin/shutdown", "-h", "now", NULL }; + static char *const argv_halt[]= { "/sbin/halt", NULL }; + + if (kernel_execve(argv_shutdown[0], argv_shutdown, envp_init) >= 0) + return 0; + printk(KERN_INFO "%s spawn failed\n", argv_shutdown[0]); + + if (kernel_execve(argv_halt[0], argv_halt, envp_init) >= 0) + return 0; + printk(KERN_INFO "%s spawn failed\n", argv_halt[0]); + + printk(KERN_INFO "Fallback into reboot and power off\n"); + co_powering_off++; + ctrl_alt_del(); + + return 0; +} + +static void deferred_shutdown(struct work_struct *dummy) +{ + kthread_run(co_powerd, NULL, "copowerd"); +} + +/* + * This function will call from interrupt context. + */ +static void co_shedule_shutdown(void) +{ + static DECLARE_WORK(shutdown_work, deferred_shutdown); + + schedule_work(&shutdown_work); +} + + +static void co_machine_restart(void) +{ + co_terminate(CO_TERMINATE_REBOOT); +} + +void machine_power_off(void) +{ + co_terminate(CO_TERMINATE_POWEROFF); +} + +void machine_shutdown(void) +{ + co_terminate(CO_TERMINATE_HALT); +} + +void machine_emergency_restart(void) +{ + co_machine_restart(); +} + +void machine_restart(char *cmd) +{ + if (co_powering_off) + machine_power_off(); + else + co_machine_restart(); +} + +void machine_halt(void) +{ + co_terminate(CO_TERMINATE_HALT); +} + +static irqreturn_t power_interrupt(int irq, void *dev_id) +{ + co_message_node_t *node_message; + + if (co_get_message(&node_message, CO_DEVICE_POWER)) { + co_linux_message_t *message; + co_linux_message_power_t *type; + + message = (co_linux_message_t *)&node_message->msg.data; + type = (co_linux_message_power_t *)message->data; + switch (type->type) { + case CO_LINUX_MESSAGE_POWER_ALT_CTRL_DEL: + ctrl_alt_del(); + break; + case CO_LINUX_MESSAGE_POWER_SHUTDOWN: + co_shedule_shutdown(); + break; + case CO_LINUX_MESSAGE_POWER_OFF: + machine_power_off(); + break; + default: + printk(KERN_ERR "power interrupt: buggy type %d\n", type->type); + } + co_free_message(node_message); + } + + return IRQ_HANDLED; +} + +static int __init co_power_init(void) +{ + int rc; + + rc = request_irq(POWER_IRQ, &power_interrupt, IRQF_SAMPLE_RANDOM, "power", NULL); + if (rc) { + printk(KERN_ERR "POWER: unable to get irq %d", POWER_IRQ); + return rc; + } + + return 0; +} + +__initcall(co_power_init); Index: linux-2.6.25-source/arch/x86/mach-default/setup.c =================================================================== --- linux-2.6.25-source.orig/arch/x86/mach-default/setup.c +++ linux-2.6.25-source/arch/x86/mach-default/setup.c @@ -142,6 +142,7 @@ late_initcall(print_ipi_mode); +#ifndef CONFIG_COOPERATIVE /** * machine_specific_memory_setup - Hook for machine specific memory setup. * @@ -184,3 +185,4 @@ } return who; } +#endif /* !CONFIG_COOPERATIVE */ Index: linux-2.6.25-source/arch/x86/kernel/i8259_cooperative.c =================================================================== --- /dev/null +++ linux-2.6.25-source/arch/x86/kernel/i8259_cooperative.c @@ -0,0 +1,101 @@ +/* + * linux/arch/x86/kernel/i8259_cooperative.c + */ + +#include +#include +#include + +#include +#include +#include + +CO_TRACE_STOP; + +/* Function must have the same prototype as do_IRQ() */ +void proxy_interrupt_handler(struct pt_regs *regs) +{ + co_passage_page_assert_valid(); + co_passage_page_ref_up(); + co_passage_page->operation = CO_OPERATION_FORWARD_INTERRUPT; + co_passage_page->params[0] = (~regs->orig_ax) + 0x20; /* IRQ number */ + co_passage_page->host_state.flags &= ~(1 << 9); /* Turn IF off */ + co_switch_wrapper(); + co_callback(regs); +} + +CO_TRACE_CONTINUE; + +/* + * Not like you have any other choice other than using + * COPIC in Cooperative mode. + */ + +static void end_COPIC_irq(unsigned int irq) +{ +} + +static void mask_and_ack_COPIC(unsigned int irq) +{ +} + +static unsigned int startup_COPIC_irq(unsigned int irq) +{ + return 0; +} + +#define shutdown_COPIC_irq disable_COPIC_irq + +static void disable_COPIC_irq(unsigned int irq) +{ +} + +static void enable_COPIC_irq(unsigned int irq) +{ +} + +static struct irq_chip co_pic_irq_chip = { + .name = "COPIC", + .startup = startup_COPIC_irq, + .shutdown = shutdown_COPIC_irq, + .enable = enable_COPIC_irq, + .disable = disable_COPIC_irq, + .mask_ack = mask_and_ack_COPIC, + .end = end_COPIC_irq, + .set_affinity = NULL +}; + +/* Called from pre_intr_init_hook */ +void __init init_ISA_irqs (void) +{ + int i; + + printk("Setting proxy interrupt vectors\n"); + for (i = 0; i < NR_IRQS; i++) { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = 0; + irq_desc[i].depth = 1; + + set_irq_chip_and_handler_name(i, &co_pic_irq_chip, handle_simple_irq, "CO"); + } + +} + +void __init init_IRQ(void) +{ + int i; + + /* all the set up before the call gates are initialised */ + pre_intr_init_hook(); + + /* + * Cover the whole vector space, no vector can escape us. + */ + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (i >= NR_IRQS) + break; + if (vector != SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); + } +} Index: linux-2.6.25-source/arch/x86/kernel/ioport_cooperative.c =================================================================== --- /dev/null +++ linux-2.6.25-source/arch/x86/kernel/ioport_cooperative.c @@ -0,0 +1,15 @@ +/* + * linux/arch/x86/kernel/ioport_cooperative.c + */ + +#include + +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) +{ + return -EPERM; +} + +asmlinkage long sys_iopl(unsigned long unused) +{ + return -EPERM; +} Index: linux-2.6.25-source/lib/iomap.c =================================================================== --- linux-2.6.25-source.orig/lib/iomap.c +++ linux-2.6.25-source/lib/iomap.c @@ -59,6 +59,7 @@ bad_io_access(port, #is_pio ); \ } while (0) +#ifndef CONFIG_COOPERATIVE #ifndef pio_read16be #define pio_read16be(port) swab16(inw(port)) #define pio_read32be(port) swab32(inl(port)) @@ -224,6 +225,7 @@ EXPORT_SYMBOL(iowrite8_rep); EXPORT_SYMBOL(iowrite16_rep); EXPORT_SYMBOL(iowrite32_rep); +#endif /* Create a virtual mapping cookie for an IO port range */ void __iomem *ioport_map(unsigned long port, unsigned int nr) Index: linux-2.6.25-source/drivers/char/Kconfig =================================================================== --- linux-2.6.25-source.orig/drivers/char/Kconfig +++ linux-2.6.25-source/drivers/char/Kconfig @@ -1044,6 +1044,7 @@ config DEVPORT bool depends on !M68K + depends on !COOPERATIVE depends on ISA || PCI default y Index: linux-2.6.25-source/drivers/crypto/Kconfig =================================================================== --- linux-2.6.25-source.orig/drivers/crypto/Kconfig +++ linux-2.6.25-source/drivers/crypto/Kconfig @@ -51,6 +51,7 @@ config CRYPTO_DEV_GEODE tristate "Support for the Geode LX AES engine" depends on X86_32 && PCI + depends on !COOPERATIVE select CRYPTO_ALGAPI select CRYPTO_BLKCIPHER help Index: linux-2.6.25-source/drivers/usb/Kconfig =================================================================== --- linux-2.6.25-source.orig/drivers/usb/Kconfig +++ linux-2.6.25-source/drivers/usb/Kconfig @@ -17,6 +17,7 @@ # NOTE: SL-811 option should be board-specific ... config USB_ARCH_HAS_HCD boolean + depends on !COOPERATIVE default y if USB_ARCH_HAS_OHCI default y if USB_ARCH_HAS_EHCI default y if PCMCIA && !M32R # sl811_cs @@ -27,6 +28,7 @@ # many non-PCI SOC chips embed OHCI config USB_ARCH_HAS_OHCI boolean + depends on !COOPERATIVE # ARM: default y if SA1111 default y if ARCH_OMAP @@ -52,6 +54,7 @@ # some non-PCI hcds implement EHCI config USB_ARCH_HAS_EHCI boolean + depends on !COOPERATIVE default y if PPC_83xx default y if SOC_AU1200 default y if ARCH_IXP4XX Index: linux-2.6.25-source/arch/x86/mm/comap.c =================================================================== --- /dev/null +++ linux-2.6.25-source/arch/x86/mm/comap.c @@ -0,0 +1,130 @@ + +/* + * This source code is a part of coLinux source package. + * + * Copyright (C) 2008 Steve Shoecraft + * + * The code is licensed under the GPL. See the COPYING file in + * the root directory. + * + */ + +#include +#include +#include +#include + +#include +#include + +static unsigned long *get_pp(void *host_buffer, int size) { + unsigned long *pp, flags; + int npages,pp_size,rc; + + /* Get # of pages */ + npages = size >> PAGE_SHIFT; + if ((npages * PAGE_SIZE) < size) npages++; + + /* Alloc mem for phys pages */ + pp_size = npages * sizeof(unsigned long); + pp = kmalloc(pp_size, GFP_KERNEL); + if (!pp) { + printk(KERN_ERR "co_map_buffer: error allocating memory for physical pages!\n"); + return 0; + } + + /* Request physical pages from the host */ + co_passage_page_assert_valid(); + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_GETPP; + co_passage_page->params[0] = (unsigned long) pp; + co_passage_page->params[1] = (unsigned long) host_buffer; + co_passage_page->params[2] = size; + co_switch_wrapper(); + rc = co_passage_page->params[0]; + co_passage_page_release(flags); + if (rc) { + printk(KERN_ERR "co_map_buffer: error getting physical pages from host!\n"); + return 0; + } + + return pp; +} + +#define VM_OPTS VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_PFNMAP | VM_RAND_READ + +void *co_map_buffer(void *host_buffer, int size) { + unsigned long addr, *pp, *pd, *pt, pa; + struct vm_struct *area; + void *buffer; + int i,npages,pd_index,pt_index; + + /* Get the physical pages for the host buffer */ + pp = get_pp(host_buffer, size); + if (!pp) return 0; + + /* Get an area of virtual memory */ + area = get_vm_area(size, VM_OPTS); + buffer = area->addr; + + npages = size >> PAGE_SHIFT; + + addr = (unsigned long) buffer; + pd = (unsigned long *) init_mm.pgd; + while(npages) { + /* Get pt */ + pd_index = pgd_index(addr); + if (pd[pd_index] != 0) { + pa = pd[pd_index] & PAGE_MASK; + pt = __va(CO_P_TO_PP(pa)); + } else { + pt = (unsigned long *) kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!pt) { + printk(KERN_ERR "co_map_buffer: unable to alloc new pt entry!\n"); + return 0; + } + memset(pt, 0, PAGE_SIZE); + pa = CO_PP_TO_P(__pa(pt)) | _PAGE_TABLE; + pd[pd_index] = pa; + } + + /* Fill pt */ + pt_index = pte_index(addr); + for(i=pt_index; i < 1024; i++) { + if (!npages) break; + BUG_ON(pt[i] != 0); + pt[i] = *pp | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED; + pp++; + addr += PAGE_SIZE; + npages--; + } + } + + return buffer; +} + +#if 0 +void dump_pt(void *buf, int size) { + unsigned long addr, *pd, *pt; + int x, pd_index, pt_index, npages; + + npages = size >> PAGE_SHIFT; + + addr = (unsigned long) buf; + pd = (unsigned long *) __va(read_cr3()); + printk(KERN_INFO "pd: %p\n", pd); + while(npages) { + pd_index = pgd_index(addr); + printk(KERN_INFO "pd[%04d]: %lx\n", pd_index, pd[pd_index]); + BUG_ON(pd[pd_index] == 0); + pt = __va(CO_P_TO_PP(pd[pd_index] & PAGE_MASK)); + pt_index = pte_index(addr); + for(x=pt_index; x < 1024; x++) { + if (!npages) break; + printk(KERN_INFO "%04d: pt[%04d]: %08lx\n", npages, x, pt[x]); + addr += PAGE_SIZE; + npages--; + } + } +} +#endif Index: linux-2.6.25-source/include/linux/page-flags.h =================================================================== --- linux-2.6.25-source.orig/include/linux/page-flags.h +++ linux-2.6.25-source/include/linux/page-flags.h @@ -93,6 +93,8 @@ /* PG_readahead is only used for file reads; PG_reclaim is only for writes */ #define PG_readahead PG_reclaim /* Reminder to do async read-ahead */ +#define PG_co_host_mapped 20 /* Page is mapped on colinux host */ + /* PG_owner_priv_1 users should have descriptive aliases */ #define PG_checked PG_owner_priv_1 /* Used by some filesystems */ #define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */ @@ -296,6 +298,9 @@ #define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags) #define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags) +#define ClearPageCoHostMapped(page) clear_bit(PG_co_host_mapped, &(page)->flags) +#define TestSetPageCoHostMapped(page) test_and_set_bit(PG_co_host_mapped, &(page)->flags) + struct page; /* forward declaration */ extern void cancel_dirty_page(struct page *page, unsigned int account_size); Index: linux-2.6.25-source/arch/x86/kernel/vmlinux_32.lds.S =================================================================== --- linux-2.6.25-source.orig/arch/x86/kernel/vmlinux_32.lds.S +++ linux-2.6.25-source/arch/x86/kernel/vmlinux_32.lds.S @@ -37,6 +37,7 @@ } :text = 0x9090 /* read-only */ + _kernel_start = .; /* colinux kernel entry */ .text : AT(ADDR(.text) - LOAD_OFFSET) { . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */ *(.text.page_aligned) Index: linux-2.6.25-source/arch/x86/mm/Makefile_32 =================================================================== --- linux-2.6.25-source.orig/arch/x86/mm/Makefile_32 +++ linux-2.6.25-source/arch/x86/mm/Makefile_32 @@ -7,3 +7,4 @@ obj-$(CONFIG_NUMA) += discontig_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_HIGHMEM) += highmem_32.o +obj-$(CONFIG_COOPERATIVE) += comap.o Index: linux-2.6.25-source/include/asm-x86/fixmap_32.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/fixmap_32.h +++ linux-2.6.25-source/include/asm-x86/fixmap_32.h @@ -32,6 +32,7 @@ #include #include #endif +#include /* * Here we define all the compile-time 'special' virtual Index: linux-2.6.25-source/include/asm-x86/io_32.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/io_32.h +++ linux-2.6.25-source/include/asm-x86/io_32.h @@ -122,6 +122,9 @@ */ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) { +#ifdef CONFIG_COOPERATIVE + panic("ioremap %ld:%ld\n", (long)offset, size); +#endif return ioremap_nocache(offset, size); } @@ -255,10 +258,18 @@ #endif /* __KERNEL__ */ +#ifdef CONFIG_COOPERATIVE +static inline void native_io_delay(void) +{ + asm volatile("jmp 1f; 1: jmp 1f; 1:" : : : "memory"); +} +static inline void io_delay_init(void) {} +#else extern void native_io_delay(void); extern int io_delay_type; extern void io_delay_init(void); +#endif #if defined(CONFIG_PARAVIRT) #include Index: linux-2.6.25-source/include/asm-x86/mmzone_32.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/mmzone_32.h +++ linux-2.6.25-source/include/asm-x86/mmzone_32.h @@ -7,6 +7,7 @@ #define _ASM_MMZONE_H_ #include +#include #ifdef CONFIG_NUMA extern struct pglist_data *node_data[]; Index: linux-2.6.25-source/include/asm-x86/pgalloc_32.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/pgalloc_32.h +++ linux-2.6.25-source/include/asm-x86/pgalloc_32.h @@ -6,6 +6,7 @@ #include #include #include +#include #ifdef CONFIG_PARAVIRT #include @@ -21,7 +22,7 @@ pmd_t *pmd, pte_t *pte) { paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); - set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); + set_pmd(pmd, __pmd(CO_PP_TO_P(__pa(pte)) | _PAGE_TABLE)); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) @@ -29,7 +30,7 @@ unsigned long pfn = page_to_pfn(pte); paravirt_alloc_pt(mm, pfn); - set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); + set_pmd(pmd, __pmd(((pteval_t)CO_PFN_PP_TO_P(pfn) << PAGE_SHIFT) | _PAGE_TABLE)); } #define pmd_pgtable(pmd) pmd_page(pmd) @@ -88,7 +89,7 @@ * TLB via cr3 if the top-level pgd is changed... */ if (mm == current->active_mm) - write_cr3(read_cr3()); + __flush_tlb(); } #endif /* CONFIG_X86_PAE */ Index: linux-2.6.25-source/include/asm-x86/pgtable_32.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/pgtable_32.h +++ linux-2.6.25-source/include/asm-x86/pgtable_32.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -71,6 +72,8 @@ #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) +#elif defined(CONFIG_COOPERATIVE) +# define VMALLOC_END (CO_VPTR_BASE_START-2*PAGE_SIZE) #else # define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) #endif @@ -173,10 +176,10 @@ #define pte_offset_kernel(dir, address) \ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) +#define pmd_page(pmd) (pfn_to_page(CO_P_TO_PP(pmd_val(pmd)) >> PAGE_SHIFT)) #define pmd_page_vaddr(pmd) \ - ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + ((unsigned long) __va(CO_P_TO_PP(pmd_val(pmd)) & PAGE_MASK)) #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ Index: linux-2.6.25-source/kernel/sysctl.c =================================================================== --- linux-2.6.25-source.orig/kernel/sysctl.c +++ linux-2.6.25-source/kernel/sysctl.c @@ -693,6 +693,7 @@ .mode = 0644, .proc_handler = &proc_dointvec, }, +#if !defined(CONFIG_COOPERATIVE) { .ctl_name = CTL_UNNUMBERED, .procname = "io_delay_type", @@ -701,6 +702,7 @@ .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif /* !CONFIG_COOPERATIVE */ #endif #if defined(CONFIG_MMU) { Index: linux-2.6.25-source/include/asm-x86/system.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/system.h +++ linux-2.6.25-source/include/asm-x86/system.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -251,8 +252,22 @@ #define write_cr0(x) (native_write_cr0(x)) #define read_cr2() (native_read_cr2()) #define write_cr2(x) (native_write_cr2(x)) +#ifdef CONFIG_COOPERATIVE + +static inline unsigned long read_cr3(void) +{ + return CO_P_TO_PP(native_read_cr3()); +} + +static inline void write_cr3(unsigned long val) +{ + native_write_cr3(CO_PP_TO_P(val)); +} + +#else #define read_cr3() (native_read_cr3()) #define write_cr3(x) (native_write_cr3(x)) +#endif #define read_cr4() (native_read_cr4()) #define read_cr4_safe() (native_read_cr4_safe()) #define write_cr4(x) (native_write_cr4(x)) Index: linux-2.6.25-source/include/asm-x86/tlbflush.h =================================================================== --- linux-2.6.25-source.orig/include/asm-x86/tlbflush.h +++ linux-2.6.25-source/include/asm-x86/tlbflush.h @@ -17,7 +17,11 @@ static inline void __native_flush_tlb(void) { +#ifdef CONFIG_COOPERATIVE + native_write_cr3(native_read_cr3()); +#else write_cr3(read_cr3()); +#endif } static inline void __native_flush_tlb_global(void) Index: linux-2.6.25-source/arch/x86/power/hibernate_32.c =================================================================== --- linux-2.6.25-source.orig/arch/x86/power/hibernate_32.c +++ linux-2.6.25-source/arch/x86/power/hibernate_32.c @@ -64,7 +64,7 @@ if (!page_table) return NULL; - set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); + set_pmd(pmd, __pmd(CO_PP_TO_P(__pa(page_table)) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); Index: linux-2.6.25-source/arch/x86/kernel/i8237.c =================================================================== --- linux-2.6.25-source.orig/arch/x86/kernel/i8237.c +++ linux-2.6.25-source/arch/x86/kernel/i8237.c @@ -23,6 +23,7 @@ static int i8237A_resume(struct sys_device *dev) { +#ifndef CONFIG_COOPERATIVE unsigned long flags; int i; @@ -41,6 +42,7 @@ enable_dma(4); release_dma_lock(flags); +#endif return 0; } Index: linux-2.6.25-source/arch/x86/kvm/Kconfig =================================================================== --- linux-2.6.25-source.orig/arch/x86/kvm/Kconfig +++ linux-2.6.25-source/arch/x86/kvm/Kconfig @@ -6,7 +6,7 @@ menuconfig VIRTUALIZATION bool "Virtualization" - depends on HAVE_KVM || X86 + depends on (HAVE_KVM || X86) && !COOPERATIVE default y ---help--- Say Y here to get to see options for using your Linux host to run other Index: linux-2.6.25-source/kernel/irq/manage.c =================================================================== --- linux-2.6.25-source.orig/kernel/irq/manage.c +++ linux-2.6.25-source/kernel/irq/manage.c @@ -523,7 +523,7 @@ struct irqaction *action; int retval; -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_COOPERATIVE) /* * Lockdep wants atomic interrupt handlers: */