Saturday, May 11, 2013

gdb -tui with syntax highlighting

A little side project.
I found the nice TUI mode of gdb and thought syntax highlighting would be nice.
I came up with the following simple (line-based) solution:
--- gdb/tui/tui.c 2013-02-03 17:16:42 +0100
+++ gdb/tui/tui.c 2013-05-10 13:47:36 +0200
@@ -58,6 +58,10 @@
 /* Tells whether the TUI is active or not.  */
 int tui_active = 0;
 static int tui_finish_init = 1;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+int tui_can_syntax_highlight = 0;
+int tui_color_tab[5];
+#endif
 
 enum tui_key_mode tui_current_key_mode = TUI_COMMAND_MODE;
 
@@ -401,6 +405,30 @@
       keypad (TUI_CMD_WIN->generic.handle, TRUE);
       wrefresh (TUI_CMD_WIN->generic.handle);
       tui_finish_init = 0;
+
+#ifdef TUI_SYNTAX_HIGHLIGHT
+      if (has_colors())
+ {
+   start_color();
+
+   init_pair (1,COLOR_MAGENTA,COLOR_BLACK);
+   tui_color_tab[0] = COLOR_PAIR (1) | A_BOLD;
+
+   init_pair (2,COLOR_GREEN,COLOR_BLACK);
+   tui_color_tab[1] = COLOR_PAIR (2) | A_BOLD;
+
+   init_pair (3,COLOR_YELLOW,COLOR_BLACK);
+   tui_color_tab[2] = COLOR_PAIR (3) | A_BOLD;
+
+   init_pair (4,COLOR_BLUE,COLOR_BLACK);
+   tui_color_tab[3] = COLOR_PAIR (4) | A_BOLD;
+
+   init_pair (5,COLOR_CYAN,COLOR_BLACK);
+   tui_color_tab[4] = COLOR_PAIR (5) | A_BOLD;
+
+   tui_can_syntax_highlight = 1;
+ }
+#endif
     }
   else
     {
--- gdb/tui/tui-source.c 2013-02-03 17:16:42 +0100
+++ gdb/tui/tui-source.c 2013-05-10 13:19:09 +0200
@@ -28,6 +28,7 @@
 #include "symtab.h"
 #include "objfiles.h"
 #include "filenames.h"
+#include "language.h"
 
 #include "tui/tui.h"
 #include "tui/tui-data.h"
@@ -38,6 +39,466 @@
 #include "gdb_string.h"
 #include "gdb_curses.h"
 
+
+#ifdef TUI_SYNTAX_HIGHLIGHT
+extern int tui_can_syntax_highlight;
+
+static const char *syntax_type_c_3[] = {
+  "int",
+  NULL
+};
+static const char *syntax_type_c_4[] = {
+  "auto",
+  "char",
+  "enum",
+  "long",
+  "void",
+  NULL
+};
+static const char *syntax_type_c_5[] = {
+  "const",
+  "float",
+  "short",
+  "union",
+  NULL
+};
+static const char *syntax_type_c_6[] = {
+  "double",
+  "extern",
+  "inline",
+  "signed",
+  "static",
+  "struct",
+  NULL
+};
+static const char *syntax_type_c_7[] = {
+  "typedef",
+  NULL
+};
+static const char *syntax_type_c_8[] = {
+  "unsigned",
+  "register",
+  "restrict",
+  "volatile",
+  NULL
+};
+static const char **syntax_type_c[] = {
+  NULL,
+  NULL,
+  syntax_type_c_3,
+  syntax_type_c_4,
+  syntax_type_c_5,
+  syntax_type_c_6,
+  syntax_type_c_7,
+  syntax_type_c_8,
+};
+
+static const char *syntax_type_cpp_4[] = {
+  "bool",
+  NULL
+};
+static const char *syntax_type_cpp_5[] = {
+  "class",
+  NULL
+};
+static const char *syntax_type_cpp_6[] = {
+  "export",
+  NULL
+};
+static const char *syntax_type_cpp_7[] = {
+  "mutable",
+  "virtual",
+  NULL
+};
+static const char *syntax_type_cpp_8[] = {
+  "explicit",
+  "template",
+  "typename",
+  NULL
+};
+static const char *syntax_type_cpp_9[] = {
+  "namespace",
+  NULL
+};
+static const char **syntax_type_cpp[] = {
+  NULL,
+  NULL,
+  NULL,
+  syntax_type_cpp_4,
+  syntax_type_cpp_5,
+  syntax_type_cpp_6,
+  syntax_type_cpp_7,
+  syntax_type_cpp_8,
+  syntax_type_cpp_9,
+};
+
+static const char *syntax_keyword_c_2[] = {
+  "do",
+  "if",
+  NULL
+};
+static const char *syntax_keyword_c_3[] = {
+  "asm",
+  "for",
+  NULL
+};
+static const char *syntax_keyword_c_4[] = {
+  "case",
+  "else",
+  "goto",
+  NULL
+};
+static const char *syntax_keyword_c_5[] = {
+  "break",
+  "while",
+  NULL
+};
+static const char *syntax_keyword_c_6[] = {
+  "return",
+  "sizeof",
+  "switch",
+  NULL
+};
+static const char *syntax_keyword_c_7[] = {
+  "default",
+  NULL
+};
+static const char *syntax_keyword_c_8[] = {
+  "continue",
+  NULL
+};
+static const char **syntax_keyword_c[] = {
+  NULL,
+  syntax_keyword_c_2,
+  syntax_keyword_c_3,
+  syntax_keyword_c_4,
+  syntax_keyword_c_5,
+  syntax_keyword_c_6,
+  syntax_keyword_c_7,
+  syntax_keyword_c_8,
+};
+
+static const char *syntax_keyword_cpp_3[] = {
+  "new",
+  "try",
+  NULL
+};
+static const char *syntax_keyword_cpp_4[] = {
+  "this",
+  NULL
+};
+static const char *syntax_keyword_cpp_5[] = {
+  "catch",
+  "throw",
+  "using",
+  NULL
+};
+static const char *syntax_keyword_cpp_6[] = {
+  "delete",
+  "friend",
+  "public",
+  "typeid",
+  NULL
+};
+static const char *syntax_keyword_cpp_7[] = {
+  "private",
+  NULL
+};
+static const char *syntax_keyword_cpp_8[] = {
+  "operator",
+  NULL
+};
+static const char *syntax_keyword_cpp_9[] = {
+  "protected",
+  NULL
+};
+static const char *syntax_keyword_cpp_10[] = {
+  "const_cast",
+  NULL
+};
+static const char *syntax_keyword_cpp_11[] = {
+  "static_cast",
+  NULL
+};
+static const char *syntax_keyword_cpp_12[] = {
+  "dynamic_cast",
+  NULL
+};
+static const char *syntax_keyword_cpp_16[] = {
+  "reinterpret_cast",
+  NULL
+};
+static const char **syntax_keyword_cpp[] = {
+  NULL,
+  NULL,
+  syntax_keyword_cpp_3,
+  syntax_keyword_cpp_4,
+  syntax_keyword_cpp_5,
+  syntax_keyword_cpp_6,
+  syntax_keyword_cpp_7,
+  syntax_keyword_cpp_8,
+  syntax_keyword_cpp_9,
+  syntax_keyword_cpp_10,
+  syntax_keyword_cpp_11,
+  syntax_keyword_cpp_12,
+  NULL,
+  NULL,
+  NULL,
+  syntax_keyword_cpp_16,
+};
+
+static const char *syntax_preproc_2[] = {
+  "if",
+  NULL
+};
+static const char *syntax_preproc_4[] = {
+  "else",
+  "warn",
+  NULL
+};
+static const char *syntax_preproc_5[] = {
+  "ifdef",
+  "endif",
+  "error",
+  "undef",
+  NULL
+};
+static const char *syntax_preproc_6[] = {
+  "define",
+  "ifndef",
+  "pragma",
+  NULL
+};
+static const char *syntax_preproc_7[] = {
+  "include",
+  NULL
+};
+static const char **syntax_preproc[] = {
+  NULL,
+  syntax_preproc_2,
+  NULL,
+  syntax_preproc_4,
+  syntax_preproc_5,
+  syntax_preproc_6,
+  syntax_preproc_7,
+};
+
+static const char *syntax_literal_4[] = {
+  "NULL",
+  "true",
+  NULL
+};
+static const char *syntax_literal_5[] = {
+  "false",
+  NULL
+};
+static const char **syntax_literal[] = {
+  NULL,
+  NULL,
+  NULL,
+  syntax_literal_4,
+  syntax_literal_5,
+};
+
+#define SYNTAX_HIGHLIGHT_EXTRA 16
+
+static int tui_keyword_highlight (const char *word,
+      char *color_word,
+      int word_len,
+      char color,
+      const char ***keywords,
+      int maxlen)
+{
+  const char **kw;
+
+  if (word_len>maxlen) return 0;
+
+  kw = keywords[word_len - 1];
+  if (!kw) return 0;
+
+  while (*kw)
+    {
+      if (!memcmp (word, *kw, word_len))
+ {
+   memset (color_word, color, word_len);
+   return 1;
+ }
+
+      kw++;
+    }
+
+  return 0;
+}
+
+enum
+{
+  COL_NORMAL,
+  COL_LITERAL,
+  COL_TYPE,
+  COL_KEYWORD,
+  COL_PREPROC,
+  COL_COMMENT,
+};
+
+static void
+tui_syntax_highlight (enum language lang,
+        const char *src_line,
+        char *col_line)
+{
+  int preproc = 0;
+
+  if( lang != language_c && lang != language_cplus ) return;
+
+  while (src_line[0])
+  {
+    char c = src_line[0];
+
+    if (!preproc && c != ' ')
+      {
+ if (c == '#')
+   {
+     preproc = 2;
+     col_line[0] = COL_PREPROC;
+   }
+ else
+   preproc = 1;
+      }
+
+    if (c >= '0' && c <= '9')
+      {
+ col_line[0] = COL_LITERAL;
+
+ c = src_line[1];
+ while ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') ||
+        (c >= 'A' && c <= 'Z') || c == '_')
+   {
+     src_line++;
+     col_line++;
+     c = src_line[1];
+
+     col_line[0] = COL_LITERAL;
+   }
+
+ preproc = 1;
+      }
+    else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')
+      {
+ const char *word_start = src_line;
+ char *col_start = col_line;
+ int word_len;
+
+ c = src_line[1];
+ while ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') ||
+        (c >= 'A' && c <= 'Z') || c == '_')
+   {
+     src_line++;
+     col_line++;
+     c = src_line[1];
+   }
+
+ word_len = src_line - word_start + 1;
+
+ do
+   {
+     if (preproc==2 &&
+  tui_keyword_highlight (word_start, col_start, word_len, COL_PREPROC, syntax_preproc, 7))
+       break;
+
+     if (tui_keyword_highlight (word_start, col_start, word_len, COL_TYPE, syntax_type_c, 8))
+       break;
+     if (lang==language_cplus &&
+  tui_keyword_highlight (word_start, col_start, word_len, COL_TYPE, syntax_type_cpp, 9))
+       break;
+
+     if (tui_keyword_highlight (word_start, col_start, word_len, COL_KEYWORD, syntax_keyword_c, 8))
+       break;
+     if (lang==language_cplus &&
+  tui_keyword_highlight (word_start, col_start, word_len, COL_KEYWORD, syntax_keyword_cpp, 16))
+       break;
+
+     if (tui_keyword_highlight (word_start, col_start, word_len, COL_LITERAL, syntax_literal, 5))
+       break;
+   }
+ while (0);
+
+ preproc = 1;
+      }
+    else if (c == '"')
+      {
+ const char *string_start = src_line;
+ char *col_start = col_line;
+
+ while (src_line[1])
+   {
+     src_line++;
+     col_line++;
+     c = src_line[0];
+
+     if (c == '\\' && src_line[1])
+       {
+  src_line++;
+  col_line++;
+  continue;
+       }
+
+     if (c == '"')
+       break;
+   }
+
+ memset (col_start, COL_LITERAL, (src_line - string_start) + 1);
+
+ preproc = 1;
+      }
+    else if (c == '\'')
+      {
+ const char *char_start = src_line;
+ char *col_start = col_line;
+
+ if (char_start[1] == '\\' && char_start[2] )
+   src_line = strchr (char_start + 3, '\'');
+ else if (char_start[1])
+   src_line = strchr (char_start + 2, '\'');
+ else
+   src_line = NULL;
+ if (!src_line)
+   src_line = char_start + (strlen (char_start) - 1);
+
+ col_line += src_line - char_start;
+
+ memset (col_start, COL_LITERAL, (src_line - char_start) + 1);
+
+ preproc = 1;
+      }
+    else if (c == '/' && (src_line[1] == '/' || src_line[1] == '*'))
+      {
+ const char *comment_start = src_line;
+ char *col_start = col_line;
+
+ src_line = comment_start[1]=='*' ?
+   strstr (comment_start + 2, "*/") : NULL;
+ if (!src_line)
+   src_line = comment_start + (strlen (comment_start) - 1);
+ else
+   src_line++;
+
+ col_line += src_line - comment_start;
+
+ memset (col_start, COL_COMMENT, (src_line - comment_start) + 1);
+
+ preproc = 1;
+      }
+    else if (c != ' ' && c != '#')
+      {
+ preproc = 1;
+      }
+
+    src_line++;
+    col_line++;
+  }
+}
+#endif
+
+
 /* Function to display source in the source window.  */
 enum tui_status
 tui_set_source_content (struct symtab *s, 
@@ -51,6 +512,9 @@
       FILE *stream;
       int i, desc, c, line_width, nlines;
       char *src_line = 0;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+      char *col_line = 0;
+#endif
 
       if ((ret = tui_alloc_source_buffer (TUI_SRC_WIN)) == TUI_SUCCESS)
  {
@@ -98,6 +562,9 @@
                   struct tui_source_info *src
       = &TUI_SRC_WIN->detail.source_info;
     const char *s_filename = symtab_to_filename_for_display (s);
+#ifdef TUI_SYNTAX_HIGHLIGHT
+    enum language lang;
+#endif
 
                   if (TUI_SRC_WIN->generic.title)
                     xfree (TUI_SRC_WIN->generic.title);
@@ -106,19 +573,35 @@
     xfree (src->fullname);
     src->fullname = xstrdup (symtab_to_fullname (s));
 
+#ifdef TUI_SYNTAX_HIGHLIGHT
+    lang = deduce_language_from_filename (src->fullname);
+    if (lang == language_unknown)
+      lang = get_frame_language ();
+    if (lang == language_unknown)
+      lang = language_cplus;
+#endif
+
     /* Determine the threshold for the length of the
                      line and the offset to start the display.  */
     offset = src->horizontal_offset;
     threshold = (line_width - 1) + offset;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+    if (tui_can_syntax_highlight)
+      threshold += SYNTAX_HIGHLIGHT_EXTRA;
+#endif
     stream = fdopen (desc, FOPEN_RT);
     clearerr (stream);
     cur_line = 0;
     src->gdbarch = get_objfile_arch (s->objfile);
     src->start_line_or_addr.loa = LOA_LINE;
     cur_line_no = src->start_line_or_addr.u.line_no = line_no;
-    if (offset > 0)
-      src_line = (char *) xmalloc (
+    src_line = (char *) xmalloc (
+      (threshold + 1) * sizeof (char));
+#ifdef TUI_SYNTAX_HIGHLIGHT
+    if (tui_can_syntax_highlight)
+      col_line = (char *) xmalloc (
         (threshold + 1) * sizeof (char));
+#endif
     while (cur_line < nlines)
       {
         struct tui_win_element *element
@@ -128,10 +611,11 @@
         /* Get the first character in the line.  */
         c = fgetc (stream);
 
-        if (offset == 0)
-   src_line = ((struct tui_win_element *)
-       TUI_SRC_WIN->generic.content[
-     cur_line])->which_element.source.line;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+        if (tui_can_syntax_highlight)
+   memset( col_line,0,threshold+1 );
+#endif
+
         /* Init the line with the line number.  */
         sprintf (src_line, "%-6d", cur_line_no);
         cur_len = strlen (src_line);
@@ -222,9 +706,27 @@
         /* Now copy the line taking the offset into
     account.  */
         if (strlen (src_line) > offset)
-   strcpy (((struct tui_win_element *)
-     TUI_SRC_WIN->generic.content[cur_line])->which_element.source.line,
-    &src_line[offset]);
+   {
+#ifdef TUI_SYNTAX_HIGHLIGHT
+     if (tui_can_syntax_highlight)
+       {
+         tui_syntax_highlight (lang,
+      src_line + cur_len,
+      col_line + cur_len);
+
+         src_line[threshold-SYNTAX_HIGHLIGHT_EXTRA] = 0;
+
+         memcpy (((struct tui_win_element *)
+          TUI_SRC_WIN->generic.content[cur_line])->which_element.source.line +
+          line_width,
+          &col_line[offset], strlen(&src_line[offset]));
+       }
+#endif
+
+     strcpy (((struct tui_win_element *)
+       TUI_SRC_WIN->generic.content[cur_line])->which_element.source.line,
+      &src_line[offset]);
+   }
         else
    ((struct tui_win_element *)
     TUI_SRC_WIN->generic.content[
@@ -232,8 +734,10 @@
         cur_line++;
         cur_line_no++;
       }
-    if (offset > 0)
-      xfree (src_line);
+    xfree (src_line);
+#ifdef TUI_SYNTAX_HIGHLIGHT
+    xfree (col_line);
+#endif
     fclose (stream);
     TUI_SRC_WIN->generic.content_size = nlines;
     ret = TUI_SUCCESS;
--- gdb/tui/tui-winsource.c 2013-02-03 17:16:42 +0100
+++ gdb/tui/tui-winsource.c 2013-05-10 13:23:29 +0200
@@ -272,19 +272,72 @@
 }
 
 
+#ifdef TUI_SYNTAX_HIGHLIGHT
+extern int tui_can_syntax_highlight;
+extern int tui_color_tab[5];
+#endif
+
 /* Redraw the complete line of a source or disassembly window.  */
 static void
 tui_show_source_line (struct tui_win_info *win_info, int lineno)
 {
   struct tui_win_element *line;
   int x, y;
+  char *src_line;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+  char *col_line;
+#endif
 
   line = (struct tui_win_element *) win_info->generic.content[lineno - 1];
+  src_line = line->which_element.source.line;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+  col_line = (char *)NULL;
+#endif
+
   if (line->which_element.source.is_exec_point)
     wattron (win_info->generic.handle, A_STANDOUT);
+#ifdef TUI_SYNTAX_HIGHLIGHT
+  else if (win_info == TUI_SRC_WIN && tui_can_syntax_highlight)
+    col_line = src_line + (win_info->generic.width - 1);
+
+  if (col_line)
+    {
+      int len = strlen (src_line);
+      char cur_col = 0;
+
+      wmove (win_info->generic.handle, lineno, 1);
+
+      while (len)
+ {
+   int output_count = 0;
+
+   while (output_count < len && col_line[output_count] == cur_col)
+     output_count++;
+
+   if (output_count)
+     {
+       waddnstr (win_info->generic.handle, src_line, output_count);
+
+       src_line += output_count;
+       col_line += output_count;
+       len -= output_count;
+     }
+
+   if (cur_col)
+     wattroff (win_info->generic.handle, tui_color_tab[cur_col - 1]);
+
+   cur_col = len ? col_line[0] : 0;
+
+   if (cur_col)
+     wattron (win_info->generic.handle, tui_color_tab[cur_col - 1]);
+ }
+    }
+  else
+#endif
+    {
+      mvwaddstr (win_info->generic.handle, lineno, 1, src_line);
+    }
 
-  mvwaddstr (win_info->generic.handle, lineno, 1,
-             line->which_element.source.line);
   if (line->which_element.source.is_exec_point)
     wattroff (win_info->generic.handle, A_STANDOUT);
 
@@ -614,6 +667,10 @@
 
   max_lines = win_info->generic.height; /* Less the highlight box.  */
   line_width = win_info->generic.width - 1;
+#ifdef TUI_SYNTAX_HIGHLIGHT
+  if (win_info == TUI_SRC_WIN)
+    line_width *= 2;
+#endif
   /*
    * Allocate the buffer for the source lines.  Do this only once
    * since they will be re-used for all source displays.  The only
--- gdb/Makefile.in 2013-04-02 19:38:43 +0200
+++ gdb/Makefile.in 2013-05-10 16:47:12 +0200
@@ -264,7 +264,7 @@
 SUBDIR_TUI_DEPS =
 SUBDIR_TUI_LDFLAGS=
 SUBDIR_TUI_CFLAGS= \
- -DTUI=1
+ -DTUI=1 -DTUI_SYNTAX_HIGHLIGHT
 
 #
 # python sub directory definitons