Index: kernel/include/dict.h
===================================================================
--- kernel/include/dict.h	(revision 590)
+++ kernel/include/dict.h	(working copy)
@@ -18,6 +18,7 @@
 #define DODFR  5
 #define DODOES 6
 
+#define MAXNFALEN 128
 
 /* The header is 28/32 bytes on 32/64bit platforms */
 
@@ -37,6 +38,10 @@
 ucell lfa2nfa(ucell ilfa);
 ucell load_dictionary(const char *data, ucell len);
 void  dump_header(dictionary_header_t *header);
+ucell fstrlen(ucell fstr);
+void fstrncpy(char *dest, ucell src, unsigned int maxlen);
+ucell findsemis(ucell xt);
+ucell findxtfromcell(ucell incell);
 
 /* program counter */
 extern ucell 		PC;
Index: kernel/dict.c
===================================================================
--- kernel/dict.c	(revision 590)
+++ kernel/dict.c	(working copy)
@@ -50,7 +50,7 @@
 
 /* fstrlen - returns length of a forth string. */
 
-static ucell fstrlen(ucell fstr)
+ucell fstrlen(ucell fstr)
 {
 	fstr -= pointer2cell(dict)+1;
 	//fstr -= pointer2cell(dict); FIXME
@@ -78,6 +78,18 @@
 	return 0;
 }
 
+/* fstrncpy - copy a forth string to a destination (with NULL termination) */
+
+void fstrncpy(char *dest, ucell src, unsigned int maxlen)
+{
+	int len = fstrlen(src);
+
+	if (fstrlen(src) >= maxlen) len = maxlen - 1;
+	memcpy(dest, cell2pointer(src), len);
+	*(dest + len) = '\0';
+} 
+
+
 /* findword
  * looks up a given word in the dictionary. This function
  * is used by the c based interpreter and to find the "initialize"
@@ -109,6 +121,69 @@
 }
 
 
+/* findsemis
+ * Given a DOCOL xt, find the address of the semis word at the
+ * end of the word definition. We do this by finding the word
+ * before this in the dictionary, then counting back one from
+ * the NFA.
+ */
+
+ucell findsemis(ucell xt)
+{
+	ucell tmplfa, nextlfa, nextcfa;
+
+	if (!last)
+		return 0;
+
+	tmplfa = read_ucell(last);
+	nextcfa = lfa2cfa(tmplfa);
+
+	/* Catch the special case where the lfa of the word we
+	 * want is the last word in the dictionary; in that case
+	 * the end of the word is given by "here" - 1 */
+	if (nextcfa == xt)
+		return pointer2cell(dict) + dicthead - sizeof(cell);
+
+	while (tmplfa) {
+
+		/* Peek ahead and see if the next CFA in the list is the
+		 * one we are searching for */ 
+		nextlfa = read_ucell(cell2pointer(tmplfa)); 
+		nextcfa = lfa2cfa(nextlfa);
+
+		/* If so, count back 1 cell from the current NFA */
+		if (nextcfa == xt)
+			return lfa2nfa(tmplfa) - sizeof(cell);
+
+		tmplfa = nextlfa;
+	}
+
+	return 0;
+}
+
+/* findxtfromcell
+ * Given a cell, determine the CFA of the word containing the cell
+ * or 0 if we are unable to return a suitable CFA
+ */
+
+ucell findxtfromcell(ucell incell)
+{
+	ucell tmplfa;
+
+	if (!last)
+		return 0;
+
+	tmplfa = read_ucell(last);
+	while (tmplfa) {
+		if (tmplfa < incell)
+			return lfa2cfa(tmplfa);
+
+		tmplfa = read_ucell(cell2pointer(tmplfa));
+	}	
+
+	return 0;
+} 
+
 void dump_header(dictionary_header_t *header)
 {
 	printk("OpenBIOS dictionary:\n");
Index: kernel/stack.c
===================================================================
--- kernel/stack.c	(revision 590)
+++ kernel/stack.c	(working copy)
@@ -19,6 +19,8 @@
 int rstackcnt = 0;
 cell rstack[rstacksize];
 
+int dbgrstackcnt = 0;
+
 #if defined(CONFIG_DEBUG_DSTACK) || defined(FCOMPILER)
 void printdstack(void)
 {
Index: kernel/primitives.c
===================================================================
--- kernel/primitives.c	(revision 590)
+++ kernel/primitives.c	(working copy)
@@ -143,4 +143,5 @@
 	sysdebug,		/* sys-debug */
 	do_include,		/* $include */
 	do_encode_file,		/* $encode-file */
+	do_debug_xt,		/* (debug  */
 };
Index: kernel/internal.c
===================================================================
--- kernel/internal.c	(revision 590)
+++ kernel/internal.c	(working copy)
@@ -19,6 +19,27 @@
 ucell PC;
 volatile int runforth = 0;
 
+#define DEBUG_MODE_NONE 0
+#define DEBUG_MODE_STEP 1
+#define DEBUG_MODE_TRACE 2
+#define DEBUG_MODE_STEPUP 3
+
+#define DEBUG_BANNER "\nStepper keys: <space>/<enter> Up Down Trace Rstack Forth\n"
+
+/* Empty linked list of debug xts */
+struct debug_xt {
+	ucell xt_docol;
+	ucell xt_semis;
+	int mode;
+	struct debug_xt *next;
+};
+
+static struct debug_xt debug_xt_eol = { (ucell)0, (ucell)0, 0, NULL};
+static struct debug_xt *debug_xt_list = &debug_xt_eol;
+
+/* Static buffer for xt name */
+char xtname[MAXNFALEN];
+
 #ifndef FCOMPILER
 /* instead of pointing to an explicit 0 variable we
  * point behind the pointer.
@@ -40,6 +61,190 @@
 #endif
 
 
+static void
+display_dbg_dstack ( void )
+{
+	/* Display dstack contents between parentheses */
+	int i;
+
+	if (dstackcnt == 0) {
+		printk(" ( Empty ) ");
+		return;
+	} else {
+		printk(" ( ");
+		for (i = 1; i <= dstackcnt; i++) {
+			if (i != 1)
+				printk(" ");
+			printk("%" FMT_CELL_x, dstack[i]);
+		}
+		printk(" ) ");
+	}
+}
+
+static void
+display_dbg_rstack ( void )
+{
+	/* Display rstack contents between parentheses */
+	int i;
+
+	if (rstackcnt == 0) {
+		printk(" ( Empty ) ");
+		return;
+	} else {
+		printk("\nR: ( ");
+		for (i = 1; i <= rstackcnt; i++) {
+			if (i != 1)
+				printk(" ");
+			printk("%" FMT_CELL_x, rstack[i]);
+		}
+		printk(" ) \n");
+	}
+}
+
+static int
+add_debug_xt( ucell xt )
+{
+	struct debug_xt *debug_xt_item;
+
+	/* If the xt CFA isn't DOCOL then issue a warning and do nothing */
+	if (read_ucell(cell2pointer(xt)) != DOCOL) {
+		printk("\nprimitive words cannot be debugged\n");
+		return 0;
+	}
+
+	/* If this xt is already in the list, do nothing but indicate success */
+	for (debug_xt_item = debug_xt_list; debug_xt_item->next != NULL; debug_xt_item = debug_xt_item->next)
+		if (debug_xt_item->xt_docol == xt)
+			return 1;
+
+	/* We already have the CFA (PC) indicating the starting cell of the word, however we also
+	   need the ending cell too (we cannot rely on the rstack as it can be arbitrarily
+	   changed by a forth word). Hence the use of findsemis() */
+
+	/* Otherwise add to the head of the linked list */
+	debug_xt_item = malloc(sizeof(struct debug_xt));
+	debug_xt_item->xt_docol = xt;
+	debug_xt_item->xt_semis = findsemis(xt);
+	debug_xt_item->mode = DEBUG_MODE_NONE;
+	debug_xt_item->next = debug_xt_list;
+	debug_xt_list = debug_xt_item;
+
+	/* Success */
+	return 1;
+} 
+
+static void
+del_debug_xt( ucell xt )
+{
+	struct debug_xt *debug_xt_item, *tmp_xt_item;
+
+	/* Handle the case where the xt is at the head of the list */
+	if (debug_xt_list->xt_docol == xt) {
+		tmp_xt_item = debug_xt_list;
+		debug_xt_list = debug_xt_list->next;
+		free(tmp_xt_item);
+
+		return;
+	}	
+
+	/* Otherwise find this xt in the linked list and remove it */
+	for (debug_xt_item = debug_xt_list; debug_xt_item->next != NULL; debug_xt_item = debug_xt_item->next) {
+		if (debug_xt_item->next->xt_docol == xt) {
+			tmp_xt_item = debug_xt_item->next;
+			debug_xt_item->next = debug_xt_item->next->next;
+			free(tmp_xt_item);
+		}
+	}
+}
+
+static void
+do_source_dbg( struct debug_xt *debug_xt_item )
+{
+	/* Forth source debugger implementation */
+	char k, done = 0;
+
+	/* Display current dstack */
+	display_dbg_dstack();
+	printk("\n");
+
+	fstrncpy(xtname, lfa2nfa(read_ucell(cell2pointer(PC)) - sizeof(cell)), MAXNFALEN);
+	printk("%p: %s ", cell2pointer(PC), xtname);
+
+	/* If in trace mode, we just carry on */
+	if (debug_xt_item->mode == DEBUG_MODE_TRACE)
+		return;
+
+	/* Otherwise in step mode, prompt for a keypress */
+	k = getchar();
+
+	/* Only proceed if done is true */
+	while (!done)
+	{
+		switch (k) {
+
+			case ' ':
+			case '\n':
+				/* Perform a single step */
+				done = 1;
+				break;
+
+			case 'u':
+			case 'U':
+				/* Up - unmark current word for debug, mark its caller for
+				 * debugging and finish executing current word */ 
+
+				/* Since this word could alter the rstack during its execution,
+				 * we only know the caller when (semis) is called for this xt.
+				 * Hence we mark the xt as a special DEBUG_MODE_STEPUP which
+				 * means we run as normal, but schedule the xt for deletion
+				 * at it corresponding (semis) word when we know the rstack
+				 * will be set to its final parent value */
+				debug_xt_item->mode = DEBUG_MODE_STEPUP;
+				done = 1;
+				break;
+
+			case 'd':
+			case 'D':
+				/* Down - mark current word for debug and step into it */
+				done = add_debug_xt(read_ucell(cell2pointer(PC)));
+				if (!done)
+					k = getchar();
+				break;
+
+			case 't':
+			case 'T':
+				/* Trace mode */
+				debug_xt_item->mode = DEBUG_MODE_TRACE;
+				done = 1;
+				break;
+
+			case 'r':
+			case 'R':
+				/* Display rstack */
+				display_dbg_rstack();
+				done = 0;
+				k = getchar();
+				break;
+
+			case 'f':
+			case 'F':
+				/* Start subordinate Forth interpreter */
+				PUSHR(PC - sizeof(cell));
+				PC = pointer2cell(findword("outer-interpreter")) + sizeof(ucell);
+
+				/* Save rstack position for when we return */
+				dbgrstackcnt = rstackcnt;
+				done = 1;
+				break;
+
+			default:
+				/* Display debug banner */
+				printk(DEBUG_BANNER);
+				k = getchar();
+		}
+	}
+}
+
 static inline void processxt(ucell xt)
 {
 	void (*tokenp) (void);
@@ -51,21 +256,93 @@
 
 static void docol(void)
 {				/* DOCOL */
+	struct debug_xt *debug_xt_item;
+
 	PUSHR(PC);
 	PC = read_ucell(cell2pointer(PC));
 
+	/* If current xt is in our debug xt list, display word name */
+	debug_xt_item = debug_xt_list;
+	while (debug_xt_item->next) {
+		if (debug_xt_item->xt_docol == PC) {
+			fstrncpy(xtname, lfa2nfa(PC - sizeof(cell)), MAXNFALEN);
+			printk("\n: %s ", xtname);
+
+			/* Step mode is the default */
+			debug_xt_item->mode = DEBUG_MODE_STEP;
+		}
+
+		debug_xt_item = debug_xt_item->next;
+	}
+
 	dbg_interp_printk("docol: %s\n", cell2pointer( lfa2nfa(PC - sizeof(cell)) ));
 }
 
 static void semis(void)
 {
+	struct debug_xt *debug_xt_item, *debug_xt_up = NULL;
+
+	/* If current semis is in our debug xt list, disable debug mode */
+	debug_xt_item = debug_xt_list;
+	while (debug_xt_item->next) {
+		if (debug_xt_item->xt_semis == PC) {
+			if (debug_xt_item->mode != DEBUG_MODE_STEPUP) {
+				/* Handle the normal case */
+				fstrncpy(xtname, lfa2nfa(debug_xt_item->xt_docol - sizeof(cell)), MAXNFALEN);
+				printk("\n[ Finished %s ] ", xtname);
+
+				/* Reset to step mode in case we were in trace mode */
+				debug_xt_item->mode = DEBUG_MODE_STEP;
+			} else {
+				/* This word requires execution of the debugger "Up"
+				 * semantics. However we can't do this here since we
+				 * are iterating through the debug list, and we need 
+				 * to change it. So we do it afterwards. 
+				 */ 
+				debug_xt_up = debug_xt_item;	
+			}
+		}
+
+		debug_xt_item = debug_xt_item->next;
+	}
+
+	/* Execute debugger "Up" semantics if required */
+	if (debug_xt_up) {
+		/* Only add the parent word if it is not within the trampoline */
+		if (rstack[rstackcnt] != (cell)pointer2cell(&trampoline[1])) {
+			del_debug_xt(debug_xt_up->xt_docol);
+			add_debug_xt(findxtfromcell(rstack[rstackcnt]));
+
+			fstrncpy(xtname, lfa2nfa(findxtfromcell(rstack[rstackcnt]) - sizeof(cell)), MAXNFALEN);
+			printk("\n[ Up to %s ] ", xtname);
+		} else {
+			fstrncpy(xtname, lfa2nfa(findxtfromcell(debug_xt_up->xt_docol) - sizeof(cell)), MAXNFALEN);
+			printk("\n[ Finished %s (Unable to go up, hit trampoline) ] ", xtname); 
+		}
+
+		debug_xt_up = NULL;
+	}
+
 	PC = POPR();
 }
 
 static inline void next(void)
 {
+	struct debug_xt *debug_xt_item;
+
 	PC += sizeof(ucell);
 
+	/* If the PC lies within a debug range, run the source debugger */
+	debug_xt_item = debug_xt_list;
+	while (debug_xt_item->next) {
+		if (PC >= debug_xt_item->xt_docol && PC <= debug_xt_item->xt_semis &&
+			debug_xt_item->mode != DEBUG_MODE_STEPUP) {
+			do_source_dbg(debug_xt_item);
+		}
+
+		debug_xt_item = debug_xt_item->next;
+	}
+
 	dbg_interp_printk("next: PC is now %x\n", PC);
 	processxt(read_ucell(cell2pointer(read_ucell(cell2pointer(PC)))));
 }
@@ -367,3 +644,16 @@
 {
 	string_relay( &encode_file );
 }
+
+static void
+do_debug_xt( void )
+{
+	ucell xt = POP();
+
+	/* Add to the debug list */
+	if (add_debug_xt(xt)) {
+		/* Display debug banner */
+		printk(DEBUG_BANNER);
+	}
+}
+
Index: kernel/bootstrap.c
===================================================================
--- kernel/bootstrap.c	(revision 590)
+++ kernel/bootstrap.c	(working copy)
@@ -78,7 +78,7 @@
 	"here", "here!", "dobranch", "do?branch", "unaligned-w@",
 	"unaligned-w!", "unaligned-l@", "unaligned-l!", "ioc@", "iow@",
 	"iol@", "ioc!", "iow!", "iol!", "i", "j", "call", "sys-debug",
-	"$include", "$encode-file"
+	"$include", "$encode-file", "(debug"
 };
 
 static void init_trampoline(void)
Index: forth/debugging/firmware.fs
===================================================================
--- forth/debugging/firmware.fs	(revision 590)
+++ forth/debugging/firmware.fs	(working copy)
@@ -62,13 +62,18 @@
 
 
 \ 7.5.3.4    Forth source-level debugger
-
+ 
 : debug    ( "old-name< >" -- )
+  parse-word            \ Look up word CFA in dictionary
+  $find
+  0 = if
+    ." could not locate word for debugging"
+    2drop
+  else
+    (debug
+  then
   ;
   
-: (debug    ( xt -- )
-  ;
-  
 : stepping    ( -- )
   ;
   
@@ -79,4 +84,6 @@
   ;
   
 : resume    ( -- )
+  \ Set interpreter termination flag
+  1 to terminate?
   ;
Index: forth/bootstrap/interpreter.fs
===================================================================
--- forth/bootstrap/interpreter.fs	(revision 590)
+++ forth/bootstrap/interpreter.fs	(working copy)
@@ -12,6 +12,7 @@
 \ 
 
 0 value interactive?
+0 value terminate?
 
 : exit?
   interactive? 0= if
@@ -122,7 +123,8 @@
     refill 
 
     ['] interpret catch print-status
-  again
+    terminate?
+  until
 ; ['] outer-interpreter (to)
 
 \ 
Index: include/openbios/stack.h
===================================================================
--- include/openbios/stack.h	(revision 590)
+++ include/openbios/stack.h	(working copy)
@@ -18,6 +18,8 @@
 extern int  rstackcnt;
 extern cell rstack[rstacksize];
 
+extern int dbgrstackcnt;
+
 //typedef struct opaque_xt *xt_t;
 //typedef struct opaque_ihandle *ihandle_t;
 //typedef struct opaque_phandle *phandle_t;
Index: modules/cmdline.c
===================================================================
--- modules/cmdline.c	(revision 590)
+++ modules/cmdline.c	(working copy)
@@ -179,6 +179,7 @@
 {
 	int cur_added=0, histind=0, ch, i, pos=0, n=0, prompt=1;
 	char *buf = ci->buf;
+	int terminate = 0;
 
 	buf = ci->buf;
 	selfword("prepare");
@@ -187,10 +188,11 @@
 #ifdef NOLEAVE
 	for (;;)
 #else
-	while (rstackcnt)
+	while (rstackcnt && !terminate)
 #endif
 	{
 		int drop = 0;
+		terminate = 0;
 
 		if( prompt ) {
 			fword("print-prompt");
@@ -229,6 +231,12 @@
 			emit(' ');
 			PUSH( feval(buf) );
 			fword("print-status");
+
+			/* Leave the interpreter if terminate? value set */
+			fword("terminate?");
+			if (POP())
+				terminate = 1;
+
 			prompt = 1;
 			break;
 
@@ -352,7 +360,14 @@
 			move_cursor( 1-emit_str(&buf[pos++]) );
 		}
 	}
-	/* won't come here; if we ever do we should close ourselves */
+
+	/* we only get here if terminate? is non-zero; this should
+         * only ever be done for a subordinate forth interpreter 
+         * e.g. for debugging */
+
+	/* Reset stack and terminate? */
+	rstackcnt = dbgrstackcnt;
+	feval("0 to terminate?");
 }
 
 NODE_METHODS( cmdline ) = {
