/* * Copyright 2010 Stefan Lankes, Chair for Operating Systems, * RWTH Aachen University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This file is part of MetalSVM. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CONFIG_ROCKCREEK #include #endif void start_tickless(void); void end_tickless(void); #if defined(CONFIG_ROCKCREEK) && (MAX_CORES > 1) #error RockCreek is not a SMP system #endif /* * Note that linker symbols are not variables, they have no memory allocated for * maintaining a value, rather their address is their value. */ extern const void kernel_start; extern const void kernel_end; // IO APIC MMIO structure: write reg, then read or write data. typedef struct { uint32_t reg; uint32_t pad[3]; uint32_t data; } ioapic_t; static const apic_processor_entry_t* apic_processors[MAX_CORES] = {[0 ... MAX_CORES-1] = NULL}; static uint32_t boot_processor = MAX_CORES; apic_mp_t* apic_mp __attribute__ ((section (".data"))) = NULL; static apic_config_table_t* apic_config = NULL; static size_t lapic = 0; static volatile ioapic_t* ioapic = NULL; static uint32_t icr = 0; static uint32_t ncores = 1; static uint8_t irq_redirect[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF}; #if MAX_CORES > 1 static uint8_t boot_code[] = { 0xFA, 0x0F, 0x01, 0x16, 0x3B, 0x70, 0x0F, 0x20, 0xC0, 0x0C, 0x01, 0x0F, 0x22, 0xC0, 0x66, 0xEA, 0x16, 0x70, 0x00, 0x00, 0x08, 0x00, 0x31, 0xC0, 0x66, 0xB8, 0x10, 0x00, 0x8E, 0xD8, 0x8E, 0xC0, 0x8E, 0xE0, 0x8E, 0xE8, 0x8E, 0xD0, 0xBC, 0xEF, 0xBE, 0xAD, 0xDE, 0x68, 0xAD, 0xDE, 0xAD, 0xDE, 0x6A, 0x00, 0xEA, 0xDE, 0xC0, 0xAD, 0xDE, 0x08, 0x00, 0xEB, 0xFE, 0x17, 0x00, 0x41, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x9A, 0xCF, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x92, 0xCF, 0x00}; atomic_int32_t cpu_online = ATOMIC_INIT(1); #endif static uint8_t initialized = 0; spinlock_t bootlock = SPINLOCK_INIT; // forward declaration static int lapic_reset(void); static uint32_t lapic_read_default(uint32_t addr) { return *((volatile uint32_t*) (lapic+addr)); } static uint32_t lapic_read_msr(uint32_t addr) { return rdmsr(0x800 + (addr >> 4)); } typedef uint32_t (*lapic_read_func)(uint32_t addr); static lapic_read_func lapic_read = lapic_read_default; static void lapic_write_default(uint32_t addr, uint32_t value) { #ifdef CONFIG_X86_32 /* * to avoid a pentium bug, we have to read a apic register * before we write a value to this register */ asm volatile ("movl (%%eax), %%edx; movl %%ebx, (%%eax)" :: "a"(lapic+addr), "b"(value) : "%edx"); #else *((volatile uint32_t*) (lapic+addr)) = value; #endif } static void lapic_write_msr(uint32_t addr, uint32_t value) { wrmsr(0x800 + (addr >> 4), value); } typedef void (*lapic_write_func)(uint32_t addr, uint32_t value); static lapic_write_func lapic_write = lapic_write_default; static inline uint32_t ioapic_read(uint32_t reg) { ioapic->reg = reg; return ioapic->data; } static inline void ioapic_write(uint32_t reg, uint32_t value) { ioapic->reg = reg; ioapic->data = value; } static inline uint32_t ioapic_version(void) { if (ioapic) return ioapic_read(IOAPIC_REG_VER) & 0xFF; return 0; } static inline uint32_t ioapic_max_redirection_entry(void) { if (ioapic) return (ioapic_read(IOAPIC_REG_VER) >> 16) & 0xFF; return 0; } /* * Send a 'End of Interrupt' command to the APIC */ void apic_eoi(void) { if (BUILTIN_EXPECT(lapic, 1)) lapic_write(APIC_EOI, 0); } uint32_t apic_cpu_id(void) { if (lapic && initialized) return ((lapic_read(APIC_ID)) >> 24); return 0; } static inline void apic_set_cpu_id(uint32_t id) { if (lapic && initialized) lapic_write(APIC_ID, id << 24); } static inline uint32_t apic_version(void) { if (lapic) return lapic_read(APIC_VERSION) & 0xFF; return 0; } static inline uint32_t apic_lvt_entries(void) { if (lapic) return (lapic_read(APIC_VERSION) >> 16) & 0xFF; return 0; } int apic_is_enabled(void) { return (lapic && initialized); } int apic_disable_timer(void) { if (BUILTIN_EXPECT(!apic_is_enabled(), 0)) return -EINVAL; lapic_write(APIC_LVT_T, 0x10000); // disable timer interrupt start_tickless(); return 0; } int apic_enable_timer(void) { if (BUILTIN_EXPECT(apic_is_enabled() && icr, 1)) { lapic_write(APIC_DCR, 0xB); // set it to 1 clock increments lapic_write(APIC_LVT_T, 0x2007B); // connects the timer to 123 and enables it lapic_write(APIC_ICR, icr); end_tickless(); return 0; } return -EINVAL; } #if MAX_CORES > 1 static inline void set_ipi_dest(uint32_t cpu_id) { uint32_t tmp; tmp = lapic_read(APIC_ICR2); tmp &= 0x00FFFFFF; tmp |= (cpu_id << 24); lapic_write(APIC_ICR2, tmp); } int ipi_tlb_flush(void) { uint32_t flags; uint32_t i, j; if (atomic_int32_read(&cpu_online) == 1) return 0; if (lapic_read(APIC_ICR1) & APIC_ICR_BUSY) { kputs("ERROR: previous send not complete"); return -EIO; } flags = irq_nested_disable(); if (atomic_int32_read(&cpu_online) == ncores) { lapic_write(APIC_ICR1, APIC_INT_ASSERT|APIC_DEST_ALLBUT|APIC_DM_FIXED|124); j = 0; while((lapic_read(APIC_ICR1) & APIC_ICR_BUSY) && (j < 1000)) j++; // wait for it to finish, give up eventualy tho } else { for(i=0; i> 4; *((volatile unsigned short *) reset_vector) = 0x00; if (lapic_read(APIC_ICR1) & APIC_ICR_BUSY) { kputs("ERROR: previous send not complete"); return -EIO; } //kputs("Send IPI\n"); // send out INIT to AP if (has_x2apic()) { uint64_t dest = ((uint64_t)id << 32); wrmsr(0x800 + (APIC_ICR1 >> 4), dest|APIC_INT_LEVELTRIG|APIC_INT_ASSERT|APIC_DM_INIT); udelay(200); // reset INIT wrmsr(0x800 + (APIC_ICR1 >> 4), APIC_INT_LEVELTRIG|APIC_DM_INIT); udelay(10000); // send out the startup wrmsr(0x800 + (APIC_ICR1 >> 4), dest|APIC_DM_STARTUP|(start_eip >> 12)); udelay(200); // do it again wrmsr(0x800 + (APIC_ICR1 >> 4), dest|APIC_DM_STARTUP|(start_eip >> 12)); udelay(200); } else { set_ipi_dest(id); lapic_write(APIC_ICR1, APIC_INT_LEVELTRIG|APIC_INT_ASSERT|APIC_DM_INIT); udelay(200); // reset INIT lapic_write(APIC_ICR1, APIC_INT_LEVELTRIG|APIC_DM_INIT); udelay(10000); // send out the startup set_ipi_dest(id); lapic_write(APIC_ICR1, APIC_DM_STARTUP|(start_eip >> 12)); udelay(200); // do it again set_ipi_dest(id); lapic_write(APIC_ICR1, APIC_DM_STARTUP|(start_eip >> 12)); udelay(200); } //kputs("IPI done...\n"); i = 0; while((lapic_read(APIC_ICR1) & APIC_ICR_BUSY) && (i < 1000)) i++; // wait for it to finish, give up eventualy tho return ((lapic_read(APIC_ICR1) & APIC_ICR_BUSY) ? -EIO : 0); // did it fail (still delivering) or succeed ? } /* * This is defined in entry.asm. We use this to properly reload * the new segment registers */ extern void gdt_flush(void); /* * This is defined in entry.asm and initialized the processors. */ extern void cpu_init(void); /* * platform independent entry point of the application processors */ extern int smp_main(void); #ifdef CONFIG_X86_64 /* * 32bit entry point, which jumps to the 64bit code smp_start */ extern void smp_entry(void); #endif void smp_start(uint32_t id) { #ifdef CONFIG_X86_32 size_t i; #endif atomic_int32_inc(&cpu_online); if (lapic && has_x2apic()) // enable x2APIC support wrmsr(0x1B, 0xFEE00C00); // reset APIC and set id lapic_reset(); apic_set_cpu_id(id); kprintf("Application processor %d is entering its idle task\n", apic_cpu_id()); #ifdef CONFIG_X86_32 // initialization for x86_64 is done in smp_entry() cpu_init(); #endif // use the same gdt like the boot processors gdt_flush(); // install IDT idt_install(); // On 64bit system, paging is already enabled #ifdef CONFIG_X86_32 /* enable paging */ write_cr3((size_t)get_boot_pgd()); i = read_cr0(); i = i | (1 << 31); write_cr0(i); // reset APIC and set id lapic_reset(); // sets also the timer interrupt apic_set_cpu_id(id); #endif /* * we turned on paging * => now, we are able to register our task */ register_task(); // enable additional cpu features cpu_detection(); kprintf("CR0 of core %u: 0x%x\n", apic_cpu_id(), read_cr0()); smp_main(); // idle loop while(1) ; } #endif #ifdef CONFIG_X86_32 static apic_mp_t* search_apic(size_t base, size_t limit) { size_t ptr; apic_mp_t* tmp; for (ptr=base; ptr<=limit-sizeof(uint32_t); ptr++) { tmp = (apic_mp_t*) ptr; if (tmp->signature == MP_FLT_SIGNATURE) { if (!((tmp->version > 4) || tmp->features[0])) return tmp; } } return NULL; } #endif #if MAX_CORES > 1 int smp_init(void) { uint32_t i, j; char* bootaddr; int err; if (ncores <= 1) return -EINVAL; kprintf("CR0 of core %u: 0x%x\n", apic_cpu_id(), read_cr0()); for(i=1; (i= 4) lapic_write(APIC_LVT_TSR, 0x10000); // disable thermal sensor interrupt if (max_lvt >= 5) lapic_write(APIC_LVT_PMC, 0x10000); // disable performance counter interrupt lapic_write(APIC_LINT0, 0x7C); // connect LINT0 to idt entry 124 lapic_write(APIC_LINT1, 0x7D); // connect LINT1 to idt entry 125 lapic_write(APIC_LVT_ER, 0x7E); // connect error to idt entry 126 return 0; } int map_apic(void) { uint32_t i; if (!lapic) return -ENXIO; #ifdef CONFIG_X86_32 lapic = map_region(0 /*lapic*/, lapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE); if (BUILTIN_EXPECT(!lapic, 0)) return -ENXIO; #else if (lapic != (size_t)&kernel_start - 0x1000) { kprintf("Upps! Kernel has to remap LAPIC!\n"); lapic = map_region(0 /*lapic*/, lapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE); if (BUILTIN_EXPECT(!lapic, 0)) return -ENXIO; } #endif kprintf("Mapped LAPIC at 0x%x\n", lapic); if (ioapic) { size_t old = 0; ioapic = (ioapic_t*) map_region(0 /*(size_t)ioapic*/, (size_t) ioapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE); kprintf("Mapped IOAPIC at 0x%x\n", ioapic); // map all processor entries for(i=0; i Therefore, we disable the PIC outportb(0xA1, 0xFF); outportb(0x21, 0xFF); #else /* * On the SCC, we already know the processor frequency * and possess no PIC timer. Therfore, we use the rdtsc to * to calibrate the APIC timer. */ flags = irq_nested_disable(); lapic_write(APIC_DCR, 0xB); // set it to 1 clock increments lapic_write(APIC_LVT_T, 0x2007B); // connects the timer to 123 and enables it lapic_write(APIC_ICR, 0xFFFFFFFFUL); /* wait 3 time slices to determine a ICR */ rmb(); start = rdtsc(); do { rmb(); end = rdtsc(); ticks = end > start ? end - start : start - end; } while(ticks*TIMER_FREQ < 3*RC_REFCLOCKMHZ*1000000UL); icr = (0xFFFFFFFFUL - lapic_read(APIC_CCR)) / 3; lapic_reset(); irq_nested_enable(flags); #endif kprintf("APIC calibration determines an ICR of 0x%x\n", icr); flags = irq_nested_disable(); if (ioapic) { uint32_t max_entry = ioapic_max_redirection_entry(); // now lets turn everything else on for(i=0; i<=max_entry; i++) if (i != 2) ioapic_inton(i, apic_processors[boot_processor]->id); // now, we don't longer need the IOAPIC timer and turn it off ioapic_intoff(2, apic_processors[boot_processor]->id); } initialized = 1; #if MAX_CORES > 1 smp_init(); #endif irq_nested_enable(flags); return 0; } static int apic_probe(void) { size_t addr; uint32_t i, count; int isa_bus = -1; #ifdef CONFIG_X86_32 #if 1 apic_mp = search_apic(0xF0000, 0x100000); if (apic_mp) goto found_mp; apic_mp = search_apic(0x9F000, 0xA0000); if (apic_mp) goto found_mp; #else // searching MP signature in the reserved memory areas if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) { multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr; multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length); while (mmap < mmap_end) { if (mmap->type == MULTIBOOT_MEMORY_RESERVED) { addr = mmap->addr; /* * MultiProcessor Specification 1.4: * ================================= * The following is a list of the suggested memory spaces for the MP configuration table: * a. In the first kilobyte of Extended BIOS Data Area (EBDA), or * b. Within the last kilobyte of system base memory if the EBDA segment is undefined, or * c. At the top of system physical memory, or * d. In the BIOS read-only memory space between 0E0000h and 0FFFFFh. */ for(i=0; (ilen-sizeof(uint32_t)) && (addr < 0x0FFFFF); i++, addr++) { if (*((uint32_t*) addr) == MP_FLT_SIGNATURE) { apic_mp = (apic_mp_t*) addr; if (!((apic_mp->version > 4) || apic_mp->features[0])) goto found_mp; } } } mmap++; } } #endif #endif found_mp: if (!apic_mp) goto no_mp; kprintf("Found MP config table at 0x%x\n", apic_mp); kprintf("System uses Multiprocessing Specification 1.%u\n", apic_mp->version); kprintf("MP features 1: %u\n", apic_mp->features[0]); if (apic_mp->features[0]) { kputs("Currently, MetalSVM supports only multiprocessing via the MP config tables!\n"); goto no_mp; } apic_config = (apic_config_table_t*) ((size_t) apic_mp->mp_config); if (!apic_config || strncmp((void*) &apic_config->signature, "PCMP", 4) !=0) { kputs("Invalid MP config table\n"); goto no_mp; } addr = (size_t) apic_config; addr += sizeof(apic_config_table_t); if (addr % 4) addr += 4 - addr % 4; // search the ISA bus => required to redirect the IRQs for(i=0; ientry_count; i++) { switch(*((uint8_t*) addr)) { case 0: addr += 20; break; case 1: { apic_bus_entry_t* mp_bus; mp_bus = (apic_bus_entry_t*) addr; if (mp_bus->name[0] == 'I' && mp_bus->name[1] == 'S' && mp_bus->name[2] == 'A') isa_bus = i; } default: addr += 8; } } addr = (size_t) apic_config; addr += sizeof(apic_config_table_t); if (addr % 4) addr += 4 - addr % 4; for(i=0, count=0; ientry_count; i++) { if (*((uint8_t*) addr) == 0) { // cpu entry if (i < MAX_CORES) { apic_processors[i] = (apic_processor_entry_t*) addr; if (!(apic_processors[i]->cpu_flags & 0x01)) // is the processor usable? apic_processors[i] = NULL; else if (apic_processors[i]->cpu_flags & 0x02) boot_processor = i; } count++; addr += 20; } else if (*((uint8_t*) addr) == 2) { // IO_APIC apic_io_entry_t* io_entry = (apic_io_entry_t*) addr; ioapic = (ioapic_t*) ((size_t) io_entry->addr); addr += 8; kprintf("Found IOAPIC at 0x%x\n", ioapic); } else if (*((uint8_t*) addr) == 3) { // IO_INT apic_ioirq_entry_t* extint = (apic_ioirq_entry_t*) addr; if (extint->src_bus == isa_bus) { irq_redirect[extint->src_irq] = extint->dest_intin; kprintf("Redirect irq %u -> %u\n", extint->src_irq, extint->dest_intin); } addr += 8; } else addr += 8; } kprintf("Found %u cores\n", count); if (count > MAX_CORES) { kputs("Found too many cores! Increase the macro MAX_CORES!\n"); goto no_mp; } ncores = count; check_lapic: if (apic_config) lapic = apic_config->lapic; else if (has_apic()) lapic = 0xFEE00000; if (!lapic) goto out; if (has_x2apic()) { kprintf("Enable X2APIC support!\n"); wrmsr(0x1B, lapic | 0xD00); lapic_read = lapic_read_msr; lapic_write = lapic_write_msr; } kprintf("Found APIC at 0x%x\n", lapic); #ifdef CONFIG_X86_64 // On a x64 system, we already map the lapic below the kernel lapic = (size_t)&kernel_start - 0x1000; #endif kprintf("Maximum LVT Entry: 0x%x\n", apic_lvt_entries()); kprintf("APIC Version: 0x%x\n", apic_version()); if (!((apic_version() >> 4))) { kprintf("Currently, MetalSVM didn't supports extern APICs!\n"); goto out; } if (apic_lvt_entries() < 3) { kprintf("LVT is too small\n"); goto out; } return 0; out: apic_mp = NULL; apic_config = NULL; lapic = 0; ncores = 1; return -ENXIO; no_mp: apic_mp = NULL; apic_config = NULL; ncores = 1; goto check_lapic; } #if MAX_CORES > 1 static void apic_tlb_handler(struct state *s) { uint32_t val = read_cr3(); if (val) write_cr3(val); } #endif static void apic_err_handler(struct state *s) { kprintf("Got APIC error 0x%x\n", lapic_read(APIC_ESR)); } int apic_init(void) { int ret; ret = apic_probe(); if (ret) return ret; // set APIC error handler irq_install_handler(126, apic_err_handler); #if MAX_CORES > 1 irq_install_handler(124, apic_tlb_handler); #endif kprintf("Boot processor %u (ID %u)\n", boot_processor, apic_processors[boot_processor]->id); #if 0 // initialize local apic ret = lapic_reset(); if (ret) return ret; if (ioapic) { uint32_t i, max_entry = ioapic_max_redirection_entry(); // now lets turn everything else off for(i=0; i<=max_entry; i++) if (i != 2) ioapic_intoff(i, apic_processors[boot_processor]->id); // enable timer interrupt ioapic_inton(2, apic_processors[boot_processor]->id); } #endif return 0; } int ioapic_inton(uint8_t irq, uint8_t apicid) { ioapic_route_t route; uint32_t off; if (BUILTIN_EXPECT(irq > 24, 0)){ kprintf("IOAPIC: trying to turn on irq %i which is too high\n", irq); return -EINVAL; } if (irq < 16) off = irq_redirect[irq]*2; else off = irq*2; #if 0 route.lower.whole = ioapic_read(IOAPIC_REG_TABLE+1+off); route.dest.upper = ioapic_read(IOAPIC_REG_TABLE+off); route.lower.bitfield.mask = 0; // turn it on (stop masking) #else route.lower.bitfield.dest_mode = 0; route.lower.bitfield.mask = 0; route.dest.physical.physical_dest = apicid; // send to the boot processor route.lower.bitfield.delivery_mode = 0; route.lower.bitfield.polarity = 0; route.lower.bitfield.trigger = 0; route.lower.bitfield.vector = 0x20+irq; route.lower.bitfield.mask = 0; // turn it on (stop masking) #endif ioapic_write(IOAPIC_REG_TABLE+off, route.lower.whole); ioapic_write(IOAPIC_REG_TABLE+1+off, route.dest.upper); route.dest.upper = ioapic_read(IOAPIC_REG_TABLE+1+off); route.lower.whole = ioapic_read(IOAPIC_REG_TABLE+off); return 0; } int ioapic_intoff(uint8_t irq, uint8_t apicid) { ioapic_route_t route; uint32_t off; if (BUILTIN_EXPECT(irq > 24, 0)){ kprintf("IOAPIC: trying to turn on irq %i which is too high\n", irq); return -EINVAL; } if (irq < 16) off = irq_redirect[irq]*2; else off = irq*2; #if 0 route.lower.whole = ioapic_read(IOAPIC_REG_TABLE+1+off); route.dest.upper = ioapic_read(IOAPIC_REG_TABLE+off); route.lower.bitfield.mask = 1; // turn it off (start masking) #else route.lower.bitfield.dest_mode = 0; route.lower.bitfield.mask = 0; route.dest.physical.physical_dest = apicid; route.lower.bitfield.delivery_mode = 0; route.lower.bitfield.polarity = 0; route.lower.bitfield.trigger = 0; route.lower.bitfield.vector = 0x20+irq; route.lower.bitfield.mask = 1; // turn it off (start masking) #endif ioapic_write(IOAPIC_REG_TABLE+off, route.lower.whole); ioapic_write(IOAPIC_REG_TABLE+1+off, route.dest.upper); return 0; }