Saturday, May 11, 2013

gdb -tui with syntax highlighting

A little side project.
I found the nice TUI mode of gdb and thought syntax highlighting would be nice.
I came up with the following simple (line-based) solution:
--- gdb/tui/tui.c 2013-02-03 17:16:42 +0100
+++ gdb/tui/tui.c 2013-05-10 13:47:36 +0200
@@ -58,6 +58,10 @@
 /* Tells whether the TUI is active or not.  */
 int tui_active = 0;
 static int tui_finish_init = 1;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+int tui_can_syntax_highlight = 0;
+int tui_color_tab[5];
+#endif
 
 enum tui_key_mode tui_current_key_mode = TUI_COMMAND_MODE;
 
@@ -401,6 +405,30 @@
       keypad (TUI_CMD_WIN->generic.handle, TRUE);
       wrefresh (TUI_CMD_WIN->generic.handle);
       tui_finish_init = 0;
+
+#ifdef TUI_SYNTAX_HIGHLIGHT
+      if (has_colors())
+ {
+   start_color();
+
+   init_pair (1,COLOR_MAGENTA,COLOR_BLACK);
+   tui_color_tab[0] = COLOR_PAIR (1) | A_BOLD;
+
+   init_pair (2,COLOR_GREEN,COLOR_BLACK);
+   tui_color_tab[1] = COLOR_PAIR (2) | A_BOLD;
+
+   init_pair (3,COLOR_YELLOW,COLOR_BLACK);
+   tui_color_tab[2] = COLOR_PAIR (3) | A_BOLD;
+
+   init_pair (4,COLOR_BLUE,COLOR_BLACK);
+   tui_color_tab[3] = COLOR_PAIR (4) | A_BOLD;
+
+   init_pair (5,COLOR_CYAN,COLOR_BLACK);
+   tui_color_tab[4] = COLOR_PAIR (5) | A_BOLD;
+
+   tui_can_syntax_highlight = 1;
+ }
+#endif
     }
   else
     {
--- gdb/tui/tui-source.c 2013-02-03 17:16:42 +0100
+++ gdb/tui/tui-source.c 2013-05-10 13:19:09 +0200
@@ -28,6 +28,7 @@
 #include "symtab.h"
 #include "objfiles.h"
 #include "filenames.h"
+#include "language.h"
 
 #include "tui/tui.h"
 #include "tui/tui-data.h"
@@ -38,6 +39,466 @@
 #include "gdb_string.h"
 #include "gdb_curses.h"
 
+
+#ifdef TUI_SYNTAX_HIGHLIGHT
+extern int tui_can_syntax_highlight;
+
+static const char *syntax_type_c_3[] = {
+  "int",
+  NULL
+};
+static const char *syntax_type_c_4[] = {
+  "auto",
+  "char",
+  "enum",
+  "long",
+  "void",
+  NULL
+};
+static const char *syntax_type_c_5[] = {
+  "const",
+  "float",
+  "short",
+  "union",
+  NULL
+};
+static const char *syntax_type_c_6[] = {
+  "double",
+  "extern",
+  "inline",
+  "signed",
+  "static",
+  "struct",
+  NULL
+};
+static const char *syntax_type_c_7[] = {
+  "typedef",
+  NULL
+};
+static const char *syntax_type_c_8[] = {
+  "unsigned",
+  "register",
+  "restrict",
+  "volatile",
+  NULL
+};
+static const char **syntax_type_c[] = {
+  NULL,
+  NULL,
+  syntax_type_c_3,
+  syntax_type_c_4,
+  syntax_type_c_5,
+  syntax_type_c_6,
+  syntax_type_c_7,
+  syntax_type_c_8,
+};
+
+static const char *syntax_type_cpp_4[] = {
+  "bool",
+  NULL
+};
+static const char *syntax_type_cpp_5[] = {
+  "class",
+  NULL
+};
+static const char *syntax_type_cpp_6[] = {
+  "export",
+  NULL
+};
+static const char *syntax_type_cpp_7[] = {
+  "mutable",
+  "virtual",
+  NULL
+};
+static const char *syntax_type_cpp_8[] = {
+  "explicit",
+  "template",
+  "typename",
+  NULL
+};
+static const char *syntax_type_cpp_9[] = {
+  "namespace",
+  NULL
+};
+static const char **syntax_type_cpp[] = {
+  NULL,
+  NULL,
+  NULL,
+  syntax_type_cpp_4,
+  syntax_type_cpp_5,
+  syntax_type_cpp_6,
+  syntax_type_cpp_7,
+  syntax_type_cpp_8,
+  syntax_type_cpp_9,
+};
+
+static const char *syntax_keyword_c_2[] = {
+  "do",
+  "if",
+  NULL
+};
+static const char *syntax_keyword_c_3[] = {
+  "asm",
+  "for",
+  NULL
+};
+static const char *syntax_keyword_c_4[] = {
+  "case",
+  "else",
+  "goto",
+  NULL
+};
+static const char *syntax_keyword_c_5[] = {
+  "break",
+  "while",
+  NULL
+};
+static const char *syntax_keyword_c_6[] = {
+  "return",
+  "sizeof",
+  "switch",
+  NULL
+};
+static const char *syntax_keyword_c_7[] = {
+  "default",
+  NULL
+};
+static const char *syntax_keyword_c_8[] = {
+  "continue",
+  NULL
+};
+static const char **syntax_keyword_c[] = {
+  NULL,
+  syntax_keyword_c_2,
+  syntax_keyword_c_3,
+  syntax_keyword_c_4,
+  syntax_keyword_c_5,
+  syntax_keyword_c_6,
+  syntax_keyword_c_7,
+  syntax_keyword_c_8,
+};
+
+static const char *syntax_keyword_cpp_3[] = {
+  "new",
+  "try",
+  NULL
+};
+static const char *syntax_keyword_cpp_4[] = {
+  "this",
+  NULL
+};
+static const char *syntax_keyword_cpp_5[] = {
+  "catch",
+  "throw",
+  "using",
+  NULL
+};
+static const char *syntax_keyword_cpp_6[] = {
+  "delete",
+  "friend",
+  "public",
+  "typeid",
+  NULL
+};
+static const char *syntax_keyword_cpp_7[] = {
+  "private",
+  NULL
+};
+static const char *syntax_keyword_cpp_8[] = {
+  "operator",
+  NULL
+};
+static const char *syntax_keyword_cpp_9[] = {
+  "protected",
+  NULL
+};
+static const char *syntax_keyword_cpp_10[] = {
+  "const_cast",
+  NULL
+};
+static const char *syntax_keyword_cpp_11[] = {
+  "static_cast",
+  NULL
+};
+static const char *syntax_keyword_cpp_12[] = {
+  "dynamic_cast",
+  NULL
+};
+static const char *syntax_keyword_cpp_16[] = {
+  "reinterpret_cast",
+  NULL
+};
+static const char **syntax_keyword_cpp[] = {
+  NULL,
+  NULL,
+  syntax_keyword_cpp_3,
+  syntax_keyword_cpp_4,
+  syntax_keyword_cpp_5,
+  syntax_keyword_cpp_6,
+  syntax_keyword_cpp_7,
+  syntax_keyword_cpp_8,
+  syntax_keyword_cpp_9,
+  syntax_keyword_cpp_10,
+  syntax_keyword_cpp_11,
+  syntax_keyword_cpp_12,
+  NULL,
+  NULL,
+  NULL,
+  syntax_keyword_cpp_16,
+};
+
+static const char *syntax_preproc_2[] = {
+  "if",
+  NULL
+};
+static const char *syntax_preproc_4[] = {
+  "else",
+  "warn",
+  NULL
+};
+static const char *syntax_preproc_5[] = {
+  "ifdef",
+  "endif",
+  "error",
+  "undef",
+  NULL
+};
+static const char *syntax_preproc_6[] = {
+  "define",
+  "ifndef",
+  "pragma",
+  NULL
+};
+static const char *syntax_preproc_7[] = {
+  "include",
+  NULL
+};
+static const char **syntax_preproc[] = {
+  NULL,
+  syntax_preproc_2,
+  NULL,
+  syntax_preproc_4,
+  syntax_preproc_5,
+  syntax_preproc_6,
+  syntax_preproc_7,
+};
+
+static const char *syntax_literal_4[] = {
+  "NULL",
+  "true",
+  NULL
+};
+static const char *syntax_literal_5[] = {
+  "false",
+  NULL
+};
+static const char **syntax_literal[] = {
+  NULL,
+  NULL,
+  NULL,
+  syntax_literal_4,
+  syntax_literal_5,
+};
+
+#define SYNTAX_HIGHLIGHT_EXTRA 16
+
+static int tui_keyword_highlight (const char *word,
+      char *color_word,
+      int word_len,
+      char color,
+      const char ***keywords,
+      int maxlen)
+{
+  const char **kw;
+
+  if (word_len>maxlen) return 0;
+
+  kw = keywords[word_len - 1];
+  if (!kw) return 0;
+
+  while (*kw)
+    {
+      if (!memcmp (word, *kw, word_len))
+ {
+   memset (color_word, color, word_len);
+   return 1;
+ }
+
+      kw++;
+    }
+
+  return 0;
+}
+
+enum
+{
+  COL_NORMAL,
+  COL_LITERAL,
+  COL_TYPE,
+  COL_KEYWORD,
+  COL_PREPROC,
+  COL_COMMENT,
+};
+
+static void
+tui_syntax_highlight (enum language lang,
+        const char *src_line,
+        char *col_line)
+{
+  int preproc = 0;
+
+  if( lang != language_c && lang != language_cplus ) return;
+
+  while (src_line[0])
+  {
+    char c = src_line[0];
+
+    if (!preproc && c != ' ')
+      {
+ if (c == '#')
+   {
+     preproc = 2;
+     col_line[0] = COL_PREPROC;
+   }
+ else
+   preproc = 1;
+      }
+
+    if (c >= '0' && c <= '9')
+      {
+ col_line[0] = COL_LITERAL;
+
+ c = src_line[1];
+ while ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') ||
+        (c >= 'A' && c <= 'Z') || c == '_')
+   {
+     src_line++;
+     col_line++;
+     c = src_line[1];
+
+     col_line[0] = COL_LITERAL;
+   }
+
+ preproc = 1;
+      }
+    else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')
+      {
+ const char *word_start = src_line;
+ char *col_start = col_line;
+ int word_len;
+
+ c = src_line[1];
+ while ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') ||
+        (c >= 'A' && c <= 'Z') || c == '_')
+   {
+     src_line++;
+     col_line++;
+     c = src_line[1];
+   }
+
+ word_len = src_line - word_start + 1;
+
+ do
+   {
+     if (preproc==2 &&
+  tui_keyword_highlight (word_start, col_start, word_len, COL_PREPROC, syntax_preproc, 7))
+       break;
+
+     if (tui_keyword_highlight (word_start, col_start, word_len, COL_TYPE, syntax_type_c, 8))
+       break;
+     if (lang==language_cplus &&
+  tui_keyword_highlight (word_start, col_start, word_len, COL_TYPE, syntax_type_cpp, 9))
+       break;
+
+     if (tui_keyword_highlight (word_start, col_start, word_len, COL_KEYWORD, syntax_keyword_c, 8))
+       break;
+     if (lang==language_cplus &&
+  tui_keyword_highlight (word_start, col_start, word_len, COL_KEYWORD, syntax_keyword_cpp, 16))
+       break;
+
+     if (tui_keyword_highlight (word_start, col_start, word_len, COL_LITERAL, syntax_literal, 5))
+       break;
+   }
+ while (0);
+
+ preproc = 1;
+      }
+    else if (c == '"')
+      {
+ const char *string_start = src_line;
+ char *col_start = col_line;
+
+ while (src_line[1])
+   {
+     src_line++;
+     col_line++;
+     c = src_line[0];
+
+     if (c == '\\' && src_line[1])
+       {
+  src_line++;
+  col_line++;
+  continue;
+       }
+
+     if (c == '"')
+       break;
+   }
+
+ memset (col_start, COL_LITERAL, (src_line - string_start) + 1);
+
+ preproc = 1;
+      }
+    else if (c == '\'')
+      {
+ const char *char_start = src_line;
+ char *col_start = col_line;
+
+ if (char_start[1] == '\\' && char_start[2] )
+   src_line = strchr (char_start + 3, '\'');
+ else if (char_start[1])
+   src_line = strchr (char_start + 2, '\'');
+ else
+   src_line = NULL;
+ if (!src_line)
+   src_line = char_start + (strlen (char_start) - 1);
+
+ col_line += src_line - char_start;
+
+ memset (col_start, COL_LITERAL, (src_line - char_start) + 1);
+
+ preproc = 1;
+      }
+    else if (c == '/' && (src_line[1] == '/' || src_line[1] == '*'))
+      {
+ const char *comment_start = src_line;
+ char *col_start = col_line;
+
+ src_line = comment_start[1]=='*' ?
+   strstr (comment_start + 2, "*/") : NULL;
+ if (!src_line)
+   src_line = comment_start + (strlen (comment_start) - 1);
+ else
+   src_line++;
+
+ col_line += src_line - comment_start;
+
+ memset (col_start, COL_COMMENT, (src_line - comment_start) + 1);
+
+ preproc = 1;
+      }
+    else if (c != ' ' && c != '#')
+      {
+ preproc = 1;
+      }
+
+    src_line++;
+    col_line++;
+  }
+}
+#endif
+
+
 /* Function to display source in the source window.  */
 enum tui_status
 tui_set_source_content (struct symtab *s, 
@@ -51,6 +512,9 @@
       FILE *stream;
       int i, desc, c, line_width, nlines;
       char *src_line = 0;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+      char *col_line = 0;
+#endif
 
       if ((ret = tui_alloc_source_buffer (TUI_SRC_WIN)) == TUI_SUCCESS)
  {
@@ -98,6 +562,9 @@
                   struct tui_source_info *src
       = &TUI_SRC_WIN->detail.source_info;
     const char *s_filename = symtab_to_filename_for_display (s);
+#ifdef TUI_SYNTAX_HIGHLIGHT
+    enum language lang;
+#endif
 
                   if (TUI_SRC_WIN->generic.title)
                     xfree (TUI_SRC_WIN->generic.title);
@@ -106,19 +573,35 @@
     xfree (src->fullname);
     src->fullname = xstrdup (symtab_to_fullname (s));
 
+#ifdef TUI_SYNTAX_HIGHLIGHT
+    lang = deduce_language_from_filename (src->fullname);
+    if (lang == language_unknown)
+      lang = get_frame_language ();
+    if (lang == language_unknown)
+      lang = language_cplus;
+#endif
+
     /* Determine the threshold for the length of the
                      line and the offset to start the display.  */
     offset = src->horizontal_offset;
     threshold = (line_width - 1) + offset;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+    if (tui_can_syntax_highlight)
+      threshold += SYNTAX_HIGHLIGHT_EXTRA;
+#endif
     stream = fdopen (desc, FOPEN_RT);
     clearerr (stream);
     cur_line = 0;
     src->gdbarch = get_objfile_arch (s->objfile);
     src->start_line_or_addr.loa = LOA_LINE;
     cur_line_no = src->start_line_or_addr.u.line_no = line_no;
-    if (offset > 0)
-      src_line = (char *) xmalloc (
+    src_line = (char *) xmalloc (
+      (threshold + 1) * sizeof (char));
+#ifdef TUI_SYNTAX_HIGHLIGHT
+    if (tui_can_syntax_highlight)
+      col_line = (char *) xmalloc (
         (threshold + 1) * sizeof (char));
+#endif
     while (cur_line < nlines)
       {
         struct tui_win_element *element
@@ -128,10 +611,11 @@
         /* Get the first character in the line.  */
         c = fgetc (stream);
 
-        if (offset == 0)
-   src_line = ((struct tui_win_element *)
-       TUI_SRC_WIN->generic.content[
-     cur_line])->which_element.source.line;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+        if (tui_can_syntax_highlight)
+   memset( col_line,0,threshold+1 );
+#endif
+
         /* Init the line with the line number.  */
         sprintf (src_line, "%-6d", cur_line_no);
         cur_len = strlen (src_line);
@@ -222,9 +706,27 @@
         /* Now copy the line taking the offset into
     account.  */
         if (strlen (src_line) > offset)
-   strcpy (((struct tui_win_element *)
-     TUI_SRC_WIN->generic.content[cur_line])->which_element.source.line,
-    &src_line[offset]);
+   {
+#ifdef TUI_SYNTAX_HIGHLIGHT
+     if (tui_can_syntax_highlight)
+       {
+         tui_syntax_highlight (lang,
+      src_line + cur_len,
+      col_line + cur_len);
+
+         src_line[threshold-SYNTAX_HIGHLIGHT_EXTRA] = 0;
+
+         memcpy (((struct tui_win_element *)
+          TUI_SRC_WIN->generic.content[cur_line])->which_element.source.line +
+          line_width,
+          &col_line[offset], strlen(&src_line[offset]));
+       }
+#endif
+
+     strcpy (((struct tui_win_element *)
+       TUI_SRC_WIN->generic.content[cur_line])->which_element.source.line,
+      &src_line[offset]);
+   }
         else
    ((struct tui_win_element *)
     TUI_SRC_WIN->generic.content[
@@ -232,8 +734,10 @@
         cur_line++;
         cur_line_no++;
       }
-    if (offset > 0)
-      xfree (src_line);
+    xfree (src_line);
+#ifdef TUI_SYNTAX_HIGHLIGHT
+    xfree (col_line);
+#endif
     fclose (stream);
     TUI_SRC_WIN->generic.content_size = nlines;
     ret = TUI_SUCCESS;
--- gdb/tui/tui-winsource.c 2013-02-03 17:16:42 +0100
+++ gdb/tui/tui-winsource.c 2013-05-10 13:23:29 +0200
@@ -272,19 +272,72 @@
 }
 
 
+#ifdef TUI_SYNTAX_HIGHLIGHT
+extern int tui_can_syntax_highlight;
+extern int tui_color_tab[5];
+#endif
+
 /* Redraw the complete line of a source or disassembly window.  */
 static void
 tui_show_source_line (struct tui_win_info *win_info, int lineno)
 {
   struct tui_win_element *line;
   int x, y;
+  char *src_line;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+  char *col_line;
+#endif
 
   line = (struct tui_win_element *) win_info->generic.content[lineno - 1];
+  src_line = line->which_element.source.line;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+  col_line = (char *)NULL;
+#endif
+
   if (line->which_element.source.is_exec_point)
     wattron (win_info->generic.handle, A_STANDOUT);
+#ifdef TUI_SYNTAX_HIGHLIGHT
+  else if (win_info == TUI_SRC_WIN && tui_can_syntax_highlight)
+    col_line = src_line + (win_info->generic.width - 1);
+
+  if (col_line)
+    {
+      int len = strlen (src_line);
+      char cur_col = 0;
+
+      wmove (win_info->generic.handle, lineno, 1);
+
+      while (len)
+ {
+   int output_count = 0;
+
+   while (output_count < len && col_line[output_count] == cur_col)
+     output_count++;
+
+   if (output_count)
+     {
+       waddnstr (win_info->generic.handle, src_line, output_count);
+
+       src_line += output_count;
+       col_line += output_count;
+       len -= output_count;
+     }
+
+   if (cur_col)
+     wattroff (win_info->generic.handle, tui_color_tab[cur_col - 1]);
+
+   cur_col = len ? col_line[0] : 0;
+
+   if (cur_col)
+     wattron (win_info->generic.handle, tui_color_tab[cur_col - 1]);
+ }
+    }
+  else
+#endif
+    {
+      mvwaddstr (win_info->generic.handle, lineno, 1, src_line);
+    }
 
-  mvwaddstr (win_info->generic.handle, lineno, 1,
-             line->which_element.source.line);
   if (line->which_element.source.is_exec_point)
     wattroff (win_info->generic.handle, A_STANDOUT);
 
@@ -614,6 +667,10 @@
 
   max_lines = win_info->generic.height; /* Less the highlight box.  */
   line_width = win_info->generic.width - 1;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+  if (win_info == TUI_SRC_WIN)
+    line_width *= 2;
+#endif
   /*
    * Allocate the buffer for the source lines.  Do this only once
    * since they will be re-used for all source displays.  The only
--- gdb/Makefile.in 2013-04-02 19:38:43 +0200
+++ gdb/Makefile.in 2013-05-10 16:47:12 +0200
@@ -264,7 +264,7 @@
 SUBDIR_TUI_DEPS =
 SUBDIR_TUI_LDFLAGS=
 SUBDIR_TUI_CFLAGS= \
- -DTUI=1
+ -DTUI=1 -DTUI_SYNTAX_HIGHLIGHT
 
 #
 # python sub directory definitons

2 comments:

  1. Hi - can you let me know what version of the gdb source code this patch should be applied to?
    Many thanks!

    ReplyDelete
    Replies
    1. I'm not sure (wasn't very smart of me that I didn't add that info here), but from the date I would say any of the 7.6 series should work.
      I have an update patch for 7.8.2 as well if you want to try it out.

      Delete