
/*--------------------------------------------------------------------*/
/*--- A gdb-like interactive shell.               vg_interactive.c ---*/
/*--------------------------------------------------------------------*/

/*
   This file is part of Valgrind, an extensible x86 protected-mode
   emulator for monitoring program execution on x86-Unixes.

   Copyright (C) 2000-2003 Nicholas Nethercote
      njn25@cam.ac.uk

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.

   The GNU General Public License is contained in the file COPYING.
*/


#include "vg_include.h"


// XXX: catch SIGCONT, reprint prompt?


/*------------------------------------------------------------*/
/*--- Tokenising, parsing commands                         ---*/
/*------------------------------------------------------------*/

#define MAX_TOKENS   10

/* Returns false if tokenisation failed (due to too many tokens).
   Nb: slot after last token always set to NULL. */
Bool tokenise_cmd(Char* cmd, Char* tokens[MAX_TOKENS], UInt n)
{
   UInt i = 0, t = 0;
   
//   VG_(printf)("n = %d, cmd = '%s'\n", n, cmd);

   while (True) {
      /* Find start of token */
      for ( ; VG_(isspace)(cmd[i]); i++) { }
      if ('\0' == cmd[i]) break;

      tokens[t] = & cmd[i];
      t++;
      if (MAX_TOKENS == t) return False;
      
      /* Find end of token */
      for ( ; ! VG_(isspace0)(cmd[i]); i++) { }
      if ('\0' == cmd[i]) break;

      cmd[i] = '\0';
      i++;
   }
   tokens[t] = NULL;

   return True;
}

static Bool parse_addr(Char* loc, UInt* loc_val)
{
   UInt i;
   if ('0' == loc[0] && 'x' == loc[1]) {
      loc += 2;
      for (i = 0; VG_(isxdigit)(loc[i]); i++) { }
      if ('\0' == loc[i] && i < 10) {
         *loc_val = VG_(atoll16)(loc);
         return True;
      }
   }
   return False;
}

static Bool parse_reg(Char* loc, UInt* loc_val)
{
#  define IF(str, reg)     if (VG_STREQ(loc, str)) *loc_val = reg
#  define EL(str, reg)     else IF(str, reg)
#  define EL2(str, reg)    else if (VG_STREQ(loc, str)) do { \
                              VG_(printf)("(can't handle sub-regs yet)\n");\
                              return False; \
                           } while(0)
   if ('%' == loc[0]) {
      loc++;   // move past '%'
      IF("eax", R_EAX);
      EL("ebx", R_EBX);
      EL("ecx", R_ECX);
      EL("edx", R_EDX);
      EL("ebp", R_EBP);
      EL("esp", R_ESP);
      EL("esi", R_ESI);
      EL("edi", R_EDI);

      EL2("ax",  R_EAX);
      EL2("bx",  R_EBX);
      EL2("cx",  R_ECX);
      EL2("dx",  R_EDX);
      EL2("bp",  R_EBP);
      EL2("sp",  R_ESP);
      EL2("si",  R_ESI);
      EL2("di",  R_EDI);

      EL2("ah",  R_AH);
      EL2("al",  R_AL);
      EL2("bh",  R_BH);
      EL2("bl",  R_BL);
      EL2("ch",  R_CH);
      EL2("cl",  R_CL);
      EL2("dh",  R_DH);
      EL2("dl",  R_DL);

      else {
         return False;
      }

   } else {
      return False;
   }
   return True;

#undef IF
#undef EL
#undef EL2
}

static Bool is_valid_identifier(Char* loc)
{
   if (!VG_(isalpha)(*loc) && *loc != '_') 
      return False;
   loc++;
   while ('\0' != *loc) {
      if (!VG_(isalnum)(*loc) && *loc != '_')
         return False;
      loc++;
   }
   return True;
}

static Bool parse_var(Char* loc, Addr* addr)
{
   if (is_valid_identifier(loc)) {
      *addr = VG_(get_symbol_addr)(loc);
      
      if (0x0 != *addr)
         return True;
   } 

   // XXX: should distinguish between the two failure cases
   
   return False;
}

static Bool parse_num(Char* loc, Int* num)
{
   UInt i = 0;
   Long l;

   while (True) {
      if (loc[i] == '\0')
         break;
      else if (loc[i] < '0' || loc[i] > '9')
         return False;
      i++;
   }

   l = VG_(atoll)(loc);
   *num = l;
   if (l != *num)       /* fail if Long-->Int truncated it */
      return False;

   return True;
}

// Nb: modifies loc: "filename:line" --> "filename"
static Bool parse_line(Char* loc, Addr* addr)
{
   UInt i = 0;
   Int  line;

   while (True) {
      if (loc[i] == '\0')
         return False;

      if (loc[i] == ':') {
         loc[i] = '\0';
         break;
      }
      i++;
   }

   if ( ! parse_num(&loc[i+1], &line) || (line < 0 || line > 100000) )
      return False;

   *addr = VG_(get_line_addr)(loc, line);

   // Add the ':' back in to revert the string to it's original form.
   loc[i] = ':';

   // XXX: distinguish the two cases of failure (syntax, missing symbol)

   if (0x0 != *addr)
      return True;
   else
      return False;
}

LocKind VG_(parse_data_loc)(Char* loc, UInt* loc_val)
{
   if (parse_reg(loc, loc_val))
      return LocReg;
   else if (parse_addr(loc, loc_val) || parse_var(loc, loc_val))
      return LocAddr;
   else
      return LocError;
}

LocKind VG_(parse_code_loc)(Char* loc, Addr* addr)
{
   if (parse_line(loc, addr) || parse_addr(loc, addr) || parse_var(loc, addr))
      return LocAddr;
   else
      return LocError;
}

/*------------------------------------------------------------*/
/*--- Breakpoints                                          ---*/
/*------------------------------------------------------------*/

#define MAX_BREAKPOINTS   10

typedef
  struct {
     Addr  addr;
     Char* desc;
  }
  BreakPoint;

static BreakPoint* breakpoints[MAX_BREAKPOINTS];
static UInt        next_breakpoint = 0;

Addr is_breakpoint(Addr bb_start, UInt bb_end, Char** bkpt_desc)
{
   UInt i;
   for (i = 0; i < MAX_BREAKPOINTS && NULL != breakpoints[i]; i++) {
      if (bb_start <= breakpoints[i]->addr && breakpoints[i]->addr <= bb_end) {
         *bkpt_desc = breakpoints[i]->desc;
         return breakpoints[i]->addr;
      }
   }
   return 0x0;
}

// If there's a breakpoint to be added in this basic block, return its
// address, and fill in its description.
//
// XXX: should really return a vector of breakpoint addresses, otherwise
// can't have multiple breakpoints in the one BB
Addr VG_(need_to_add_breakpoint)(Addr bb_addr, UInt bb_size,
                                 /*out*/Char** bkpt_desc)
{
   if (! VG_(needs).interactive ) return 0x0;
   if (! VG_(clo_interactive) )   return 0x0;
   return is_breakpoint(bb_addr, bb_addr + bb_size - 1, bkpt_desc);
}

static void do_add_breakpoint(Addr addr, Char* desc)
{
   breakpoints[next_breakpoint] = 
      VG_(arena_malloc)( VG_AR_CORE, sizeof(BreakPoint) );

   breakpoints[next_breakpoint]->addr = addr;
   breakpoints[next_breakpoint]->desc =
      VG_(arena_malloc)( VG_AR_CORE, sizeof(Char) * (VG_(strlen)(desc)+1) );
   VG_(strcpy)(breakpoints[next_breakpoint]->desc, desc);

   next_breakpoint++;
}

static void add_breakpoint(Char* bp)
{
   Addr  addr;

   if (! VG_(parse_code_loc)(bp, &addr)) {
      VG_(printf)("Location `%s' invalid or not found, breakpoint not added\n",
                  bp);
      return;
   }

   if (next_breakpoint >= MAX_BREAKPOINTS) {
      VG_(printf)("breakpoint limit reached, breakpoint not added\n");
      return;
   }

   VG_(printf)("adding breakpoint %d at %p (%s)\n", next_breakpoint, addr, bp);

   do_add_breakpoint(addr, bp);
}

static void remove_breakpoint(UInt n)
{
   if (n >= next_breakpoint) {
      VG_(printf)("no such breakpoint\n");
      return;
   } else {
      VG_(printf)("removing breakpoint %d at %p (%s)\n",
                  n, breakpoints[n]->addr, breakpoints[n]->desc);
   }

   // Invalidate translation containing the breakpoint
   VG_(invalidate_translations)(breakpoints[n]->addr, 1, True);

   // Remove breakpoint from list, shuffle the rest along
   VG_(arena_free)( VG_AR_CORE, breakpoints[n]->desc );
   for ( ; n < next_breakpoint-1; n++) {
      breakpoints[n] = breakpoints[n+1];
   }
   breakpoints[n] = NULL;
   next_breakpoint--;
}

static void info_breakpoints(void)
{
   UInt i;
   VG_(printf)("Breakpoints: (%d)\n", next_breakpoint);

   if (0 == next_breakpoint) {
      VG_(printf)("  (none)\n");
   } else {
      for (i = 0; i < next_breakpoint; i++)
         VG_(printf)("  %d: %p (%s)\n",
                     i, breakpoints[i]->addr, breakpoints[i]->desc);
   }
}

static void where(void)
{
   VG_(pp_ExeContext)( 
      VG_(get_ExeContext)( 
         VG_(get_ThreadState)(
             VG_(get_current_or_recent_tid)()
         ) 
      )
   );
}

static void help(void)
{
   VG_(printf)(
"core commands:\n"
"  help           print this message\n"
"  break <bp>     add breakpoint at <bp>\n"
"  remove <n>     remove breakpoint <n>\n"
"  info           show breakpoints\n"
"  continue       resume execution from breakpoint\n"
"  print <l>      print contents of location <l>\n"
"\n"
"skin commands:\n"
   );
   SK_(print_shell_usage)();
   VG_(printf)(
"\n"
"The following are examples of ways data locations can be specified:\n"
"  %%eax           register\n"
"  0x8048000      absolute data address\n"
"  gvar           global variable name (local variable names not supported)\n"
"\n"
"The following are examples of ways code locations can be specified:\n"
"  main           function name\n"
"  0x8048000      absolute code address\n"
"  foo.c:99       filename:line number pair (somewhat brittle, if a\n"
"                 line isn't recognised try another one nearby)\n"
   );
}

/*------------------------------------------------------------*/
/*--- The shell                                            ---*/
/*------------------------------------------------------------*/

void VG_(init_interactive_shell)(void)
{
   UInt i;
   Addr addr_of_main;

   for (i = 0; i < MAX_BREAKPOINTS; i++)
      breakpoints[i] = NULL;

   // If we can find main, set a breakpoint for it.  If we can't find it,
   // we have to drop into the shell now.
   addr_of_main = VG_(get_symbol_addr)("main");
   if (0x0 == addr_of_main) {
      VG_(message)(Vg_UserMsg, 
             "warning: cannot find main(), I suggest recompiling with -g\n");
   }
   VG_(interactive_shell)(0x0, "(pre-main)");
}

static jmp_buf prompt_jmpbuf;

static void memory_error_handler(int signum)
{  
   __builtin_longjmp(prompt_jmpbuf, 1);
}

static UInt safe_deref(Addr a)
{
   vki_ksigaction action, sigsegv_saved, sigbus_saved;
   UInt           val;
   Int            res;

   /* Install own SIGSEGV, SIGBUS handlers */
   action.ksa_handler  = memory_error_handler;
   action.ksa_flags    = 0;
   action.ksa_restorer = NULL;
   res = VG_(ksigemptyset)( &action.ksa_mask );
   sk_assert(res == 0);

   res = VG_(ksigaction)( VKI_SIGSEGV, &action, &sigsegv_saved );
   sk_assert(res == 0);
   res = VG_(ksigaction)( VKI_SIGBUS,  &action, &sigbus_saved );
   sk_assert(res == 0);

   /* Do the deref */
   val = *(UInt*)a;
   
   /* Restore old SIGSEGV, SIGBUS handlers */
   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL );
   sk_assert(res == 0);
   res = VG_(ksigaction)( VKI_SIGBUS,  &sigbus_saved, NULL );
   sk_assert(res == 0);

   return val;
}


__attribute__((regparm(2)))
void VG_(interactive_shell)(Addr bkpt_addr, Char* bkpt_desc)
{
   #define BUFLEN    1024
   
   Char  buf[BUFLEN+1];
   Char* tokens[MAX_TOKENS+1];
   UInt  n;

   if (0x0 == bkpt_addr)
      VG_(printf)("Breakpoint at %s\n", bkpt_desc);
   else
      VG_(printf)("Breakpoint at %p (%s)\n", bkpt_addr, bkpt_desc);


   /* Trap for illegal instruction, in case it's a really old processor that
    * doesn't support CPUID. */
   if (__builtin_setjmp(prompt_jmpbuf) == 0) {
      /* nothing */
   } else {
      VG_(printf)("cannot access memory at that address\n");
   }

   while (True) {
      /* prompt */
      VG_(printf)("%d> ", VG_(getpid)());

      /* Read input;  0 for stdin */
      n = VG_(read)(0, buf, BUFLEN);
      if (buf[n-1] == '\n')
         buf[n-1] = '\0';     /* ended with newline, overwrite with '\0' */
      else
         buf[n]   = '\0';     /* ended with eof, add '\0' */

      if (n == -1) {
         VG_(printf)("error reading from stdin, aborting\n");
         VG_(exit)(1);

      } else if (BUFLEN == n) {
         VG_(core_panic)("command too long! aborting, sorry\n");
         /* XXX: should really:
          *      - not fail
          *      - do an lseek(?) to ignore excess input so it doesn't get
          *        used the next time
          */

      } else if (n == 0) {
         VG_(core_panic)("empty input");

      } else if (!tokenise_cmd(buf, tokens, n)) {
         VG_(printf)("error: too many tokens in command\n");

      } else if (NULL == tokens[0]) {
         /* do nothing */
      
      } else if ( VG_STREQ(tokens[0], "help") || VG_STREQ(tokens[0], "h") ) {
         if ( NULL == tokens[1] ) 
            help();
         else
            VG_(printf)("usage: %s\n", tokens[0]);

      } else if ( VG_STREQ(tokens[0], "continue") || VG_STREQ(tokens[0], "c") )
      {
         if (NULL == tokens[1] ) {
            VG_(printf)("continuing...\n");

            return;
         } else
            VG_(printf)("usage: %s\n", tokens[0]);

      } else if ( VG_STREQ(tokens[0], "break") ) {
         if ( NULL != tokens[1] && NULL == tokens[2]) 
            add_breakpoint( tokens[1] );
         else
            VG_(printf)("usage: %s <fn-name>\n", tokens[0]);

      } else if ( VG_STREQ(tokens[0], "remove") ) {
         Int num;
         
         if ( NULL != tokens[1] && parse_num(tokens[1], &num) &&
              NULL == tokens[2]) 
         {
            remove_breakpoint( num );
         } else
            VG_(printf)("usage: %s <breakpoint>\n", tokens[0]);

      } else if ( VG_STREQ(tokens[0], "info") ) {
         if ( NULL == tokens[1] )
            info_breakpoints();
         else
            VG_(printf)("usage: %s\n", tokens[0]);

      } else if ( VG_STREQ(tokens[0], "where") ) {
         if ( NULL == tokens[1] )
            where();
         else
            VG_(printf)("usage: %s\n", tokens[0]);

      } else if ( VG_STREQ(tokens[0], "print") || VG_STREQ(tokens[0], "p") ) {
         Char* loc = tokens[1];
         if ( NULL != loc && NULL == tokens[2]) {
            UInt loc_val;

            switch (VG_(parse_data_loc)(loc, &loc_val)) {
            case LocError:
               VG_(printf)("invalid location: '%s'\n", loc);
               break;

            case LocReg:
               VG_(printf)("%p\n", VG_(get_archreg)(loc_val));
               break;

            case LocAddr:
               VG_(printf)("%p: ", loc_val);
               VG_(printf)("%p\n", safe_deref((Addr)loc_val));
               break;

            default:
               VG_(core_panic)("unexpected LocKind");
            }
            
         } else {
            VG_(printf)("usage: %s <location>\n", tokens[0]);
         }

      } else {
         if (!SK_(interpret_shell_cmd)(tokens))
            VG_(printf)("Undefined command \"%s\".  Try \"help\"\n", tokens[0]);
      }
   }
}

/*--------------------------------------------------------------------*/
/*--- end                                         vg_interactive.c ---*/
/*--------------------------------------------------------------------*/
