I've had success in V2 for EPIA-SP by pulling ASM code from V1 and replacing the whole early_mtrr_init() call from V2, with this, placed right at the start of auto.c (see below). The romcc generated code works fast enough, but it really makes a difference when the linux kernel is uncompressed later. I don't really know what is happening though, I'm not an expert on the low level cache setup. There is difference between the V2 early_mtrr_init functionality and what was in V1.
__asm__ volatile ( "\n" " invd\n" "earlymtrr_start:\n" " xorl %%eax, %%eax\n" " xorl %%edx, %%edx\n" " movl $fixed_mtrr_msr, %%esi\n" "clear_fixed_var_mtrr:\n" " lodsl (%%esi), %%eax\n" " testl %%eax, %%eax\n" " jz clear_fixed_var_mtrr_out\n" " movl %%eax, %%ecx\n" " xorl %%eax, %%eax\n" " wrmsr\n" " jmp clear_fixed_var_mtrr\n" "clear_fixed_var_mtrr_out:\n" "set_fixed_mtrr:\n" " movl $0x250, %%ecx\n" " rdmsr\n" " movl $0x06060606, %%edx\n" " movl $0x06060606, %%eax\n" " wrmsr\n" " movl $0x258, %%ecx\n" " rdmsr\n" " movl $0x06060606, %%edx\n" " movl $0x06060606, %%eax\n" " wrmsr\n" "set_var_mtrr:\n" " movl $0x200, %%ecx\n" " rdmsr\n" " andl $0xfffffff0, %%edx\n" " orl $0x00000000, %%edx\n" " andl $0x00000f00, %%eax\n" " orl $0x00000006, %%eax\n" " wrmsr\n" " movl $0x201, %%ecx\n" " rdmsr\n" " andl $0xfffffff0, %%edx\n" " orl $0x0000000f, %%edx\n" " andl $0x000007ff, %%eax\n" " orl $0xf0000800, %%eax\n" " wrmsr\n" "enable_mtrr:\n" " movl $0x2ff, %%ecx\n" " xorl %%edx, %%edx\n" " movl $0x00000c00, %%eax\n" " wrmsr\n" " movl %%cr0, %%eax\n" " andl $0x9fffffff,%%eax\n" " movl %%eax, %%cr0\n" " jmp earlymtrr_end\n" "fixed_mtrr_msr:\n" " .long 0x250, 0x258, 0x259\n" " .long 0x268, 0x269, 0x26A\n" " .long 0x26B, 0x26C, 0x26D\n" " .long 0x26E, 0x26F\n" "var_mtrr_msr:\n" " .long 0x200, 0x201, 0x202, 0x203\n" " .long 0x204, 0x205, 0x206, 0x207\n" " .long 0x208, 0x209, 0x20A, 0x20B\n" " .long 0x20C, 0x20D, 0x20E, 0x20F\n" " .long 0x000\n" "earlymtrr_end:\n");