Hi all,
So far Milax crashes a few seconds into executing the OpenSolaris kernel and I'm wondering if the bug is related to not preserving registers in the MMU routines. The instruction causing the crash is the following:
0x0000000001047444: ldx [ %g7 + 0xa8 ], %o2
Given that there were several instances of this exact instruction in previous nearby blocks of code, I used the qemu debugging output to see what was happening in this particular case. Comments are denoted with a ^.
IN: 0x0000000001050860: call 0x1050ee8 0x0000000001050864: add %fp, 0x7a7, %o0
^ Looks like a standard branch. %g7 is currently set to 0x180e000
-------------- IN: mmu_translate 0x00000000ffd0a788: save %sp, -224, %sp 0x00000000ffd0a78c: call 0xffd0a830 0x00000000ffd0a790: nop
^ Hmmm. But we've now invoked mmu_translate in OpenBIOS?
-------------- IN: mmu_translate 0x00000000ffd0a794: mov %o0, %g1 0x00000000ffd0a798: stx %g1, [ %fp + 0x7df ] 0x00000000ffd0a79c: add %fp, 0x7d7, %g1 0x00000000ffd0a7a0: ldx [ %fp + 0x7df ], %o0 0x00000000ffd0a7a4: mov %g1, %o1 0x00000000ffd0a7a8: call 0xffd1e134 0x00000000ffd0a7ac: nop
-------------- IN: ofmem_translate 0x00000000ffd1e134: save %sp, -240, %sp 0x00000000ffd1e138: stx %i0, [ %fp + 0x87f ] 0x00000000ffd1e13c: stx %i1, [ %fp + 0x887 ] 0x00000000ffd1e140: call 0xffd0e37c 0x00000000ffd1e144: nop
-------------- IN: ofmem_translate 0x00000000ffd1e148: mov %o0, %g1 0x00000000ffd1e14c: stx %g1, [ %fp + 0x7d7 ] 0x00000000ffd1e150: ldx [ %fp + 0x7d7 ], %g1 0x00000000ffd1e154: ldx [ %g1 + 0x28 ], %g1 0x00000000ffd1e158: stx %g1, [ %fp + 0x7df ] 0x00000000ffd1e15c: b %xcc, 0xffd1e1d8 0x00000000ffd1e160: nop
-------------- IN: ofmem_translate 0x00000000ffd1e1d8: ldx [ %fp + 0x7df ], %g1 0x00000000ffd1e1dc: brz %g1, 0xffd1e1fc 0x00000000ffd1e1e0: nop
-------------- IN: ofmem_translate 0x00000000ffd1e1e4: ldx [ %fp + 0x7df ], %g1 0x00000000ffd1e1e8: ldx [ %g1 + 8 ], %g2 0x00000000ffd1e1ec: ldx [ %fp + 0x87f ], %g1 0x00000000ffd1e1f0: cmp %g2, %g1 0x00000000ffd1e1f4: bleu %xcc, 0xffd1e164 0x00000000ffd1e1f8: nop
-------------- IN: ofmem_translate 0x00000000ffd1e164: ldx [ %fp + 0x7df ], %g1 0x00000000ffd1e168: ldx [ %g1 + 8 ], %g2 0x00000000ffd1e16c: ldx [ %fp + 0x7df ], %g1 0x00000000ffd1e170: ldx [ %g1 + 0x10 ], %g1 0x00000000ffd1e174: add %g2, %g1, %g1 0x00000000ffd1e178: add %g1, -1, %g2 0x00000000ffd1e17c: ldx [ %fp + 0x87f ], %g1 0x00000000ffd1e180: cmp %g2, %g1 0x00000000ffd1e184: bcs %xcc, 0xffd1e1cc 0x00000000ffd1e188: nop
-------------- IN: ofmem_translate 0x00000000ffd1e1cc: ldx [ %fp + 0x7df ], %g1 0x00000000ffd1e1d0: ldx [ %g1 ], %g1 0x00000000ffd1e1d4: stx %g1, [ %fp + 0x7df ] 0x00000000ffd1e1d8: ldx [ %fp + 0x7df ], %g1 0x00000000ffd1e1dc: brz %g1, 0xffd1e1fc 0x00000000ffd1e1e0: nop
-------------- IN: ofmem_translate 0x00000000ffd1e18c: ldx [ %fp + 0x7df ], %g1 0x00000000ffd1e190: ldx [ %g1 + 8 ], %g2 0x00000000ffd1e194: ldx [ %fp + 0x87f ], %g1 0x00000000ffd1e198: sub %g1, %g2, %g1 0x00000000ffd1e19c: stx %g1, [ %fp + 0x7e7 ] 0x00000000ffd1e1a0: ldx [ %fp + 0x7df ], %g1 0x00000000ffd1e1a4: ldx [ %g1 + 0x20 ], %g2 0x00000000ffd1e1a8: ldx [ %fp + 0x887 ], %g1 0x00000000ffd1e1ac: stx %g2, [ %g1 ] 0x00000000ffd1e1b0: ldx [ %fp + 0x7df ], %g1 0x00000000ffd1e1b4: ldx [ %g1 + 0x18 ], %g2 0x00000000ffd1e1b8: ldx [ %fp + 0x7e7 ], %g1 0x00000000ffd1e1bc: add %g2, %g1, %g1 0x00000000ffd1e1c0: stx %g1, [ %fp + 0x7c7 ] 0x00000000ffd1e1c4: b %xcc, 0xffd1e204 0x00000000ffd1e1c8: nop
-------------- IN: ofmem_translate 0x00000000ffd1e204: ldx [ %fp + 0x7c7 ], %g1 0x00000000ffd1e208: mov %g1, %i0 0x00000000ffd1e20c: rett %i7 + 8 0x00000000ffd1e210: nop
-------------- IN: mmu_translate 0x00000000ffd0a7b0: mov %o0, %g1 0x00000000ffd0a7b4: stx %g1, [ %fp + 0x7e7 ] 0x00000000ffd0a7b8: ldx [ %fp + 0x7e7 ], %g1 0x00000000ffd0a7bc: cmp %g1, -1 0x00000000ffd0a7c0: be %xcc, 0xffd0a81c 0x00000000ffd0a7c4: nop
-------------- IN: mmu_translate 0x00000000ffd0a7c8: ldx [ %fp + 0x7e7 ], %g2 0x00000000ffd0a7cc: mov -1, %g1 0x00000000ffd0a7d0: srlx %g1, 0x20, %g1 0x00000000ffd0a7d4: and %g2, %g1, %g1 0x00000000ffd0a7d8: mov %g1, %o0 0x00000000ffd0a7dc: call 0xffd0a4f8 0x00000000ffd0a7e0: nop
-------------- IN: mmu_translate 0x00000000ffd0a7e4: ldx [ %fp + 0x7e7 ], %g1 0x00000000ffd0a7e8: srlx %g1, 0x20, %g1 0x00000000ffd0a7ec: mov %g1, %o0 0x00000000ffd0a7f0: call 0xffd0a4f8 0x00000000ffd0a7f4: nop
-------------- IN: mmu_translate 0x00000000ffd0a7f8: ldx [ %fp + 0x7d7 ], %g1 0x00000000ffd0a7fc: mov %g1, %o0 0x00000000ffd0a800: call 0xffd0a4f8 0x00000000ffd0a804: nop
-------------- IN: mmu_translate 0x00000000ffd0a808: mov -1, %o0 ! 0xffffffffffffffff 0x00000000ffd0a80c: call 0xffd0a4f8 0x00000000ffd0a810: nop
-------------- IN: mmu_translate 0x00000000ffd0a814: b %xcc, 0xffd0a828 0x00000000ffd0a818: nop
-------------- IN: mmu_translate 0x00000000ffd0a828: rett %i7 + 8 0x00000000ffd0a82c: nop
-------------- IN: 0x0000000001007d54: restore %o0, %g0, %o0
-------------- IN: 0x0000000001050868: call 0x104cf7c 0x000000000105086c: mov %o0, %i5
^ Return from mmu_translate block. %g7 is now 0x4c23549c.
-------------- IN: 0x0000000001014c84: sethi %hi(0x181c800), %o5 0x0000000001014c88: mov %o7, %g1 0x0000000001014c8c: ld [ %o5 + 0x10 ], %o5 0x0000000001014c90: sra %o5, 0, %o0 0x0000000001014c94: call 0x104743c 0x0000000001014c98: mov %g1, %o7
-------------- IN: 0x000000000104743c: rdpr %pil, %o1 0x0000000001047440: wrpr 0xf, %pil 0x0000000001047444: ldx [ %g7 + 0xa8 ], %o2 0x0000000001047448: ld [ %o2 + 0x10c ], %o2 0x000000000104744c: cmp %o2, %o0 0x0000000001047450: movl %xcc, %o0, %o2 0x0000000001047454: wrpr %g0, %o2, %pil 0x0000000001047458: retl 0x000000000104745c: mov %o1, %o0
Search PC... Search PC... Search PC... Search PC... Search PC... Search PC... Search PC... Search PC... Search PC... -------------- IN: 0x000000000104743c: rdpr %pil, %o1 0x0000000001047440: wrpr 0xf, %pil 0x0000000001047444: ldx [ %g7 + 0xa8 ], %o2
^ %g7 + 0xa8 = 0x4c235544 (throws alignment exception)
I'm wondering if the issue here is that the "call 0x1050ee8" instruction is causing an MMU fault which is calling OpenBIOS's mmu_translate via a trap. Since the mmu_translate function fails to preserve the global registers (compared to the CIF interface calls) then subsequent global register access is doomed to cause failure.
ATB,
Mark.