Debugging a Bison Grammar





5.00/5 (2 votes)
How to get more information from a bison .output file.
Example Output
Normally the log information for a state looks like this:
state 41 3184 reconfigure_statement: RECONFIGURE . 3185 | RECONFIGURE . WITH OVERRIDE WITH shift, and go to state 610 WITH [reduce using rule 3184 (reconfigure_statement)] $default reduce using rule 3184 (reconfigure_statement)
Using this code the output is:
State 41 3184 reconfigure_statement: RECONFIGURE . 3185 | RECONFIGURE . WITH OVERRIDE WITH shift, and go to state 610 WITH [reduce using rule 3184 (reconfigure_statement)] 2330 func_body_returns_table_6: func_body_returns_table_6 . sql_clause 2343 func_body_returns_scalar_6: func_body_returns_scalar_6 . sql_clause 3466 with_expression: . WITH with_expression_2 common_table_expression with_expression_4 2330 func_body_returns_table_6: func_body_returns_table_6 . sql_clause 2343 func_body_returns_scalar_6: func_body_returns_scalar_6 . sql_clause 3467 with_expression: . WITH blocking_hierarchy with_expression_6 AS '(' select_statement ')'
In this example, we can see that rules 3466 and 3467 cause the shift/reduce error at 3184 as they start with token WITH
and they are called by rules 2330 and 2343 via rule sql_clause
.
Using the code
In order to use the following code you will need both parsertl and lexertl:
http://www.benhanson.net/parsertl.html
http://www.benhanson.net/lexertl.html
//#include "../parsertl17/include/parsertl/debug.hpp"
#include "../parsertl17/include/parsertl/generator.hpp"
#include "../parsertl17/include/parsertl/lookup.hpp"
#include <iostream>
#include "../lexertl17/include/lexertl/memory_file.hpp"
#include "../parsertl17/include/parsertl/parse.hpp"
#include <queue>
using char_ptr_pair = std::pair<const char*, const char*>;
using char_ptr_pair_vector = std::vector<char_ptr_pair>;
using int_char_ptr_map = std::map<int, char_ptr_pair>;
using int_vector = std::vector<int>;
struct non_terminal
{
char_ptr_pair _name;
int _id = -1;
int_vector _lhs;
int_vector _rhs;
non_terminal(const char_ptr_pair &name) :
_name(name)
{
}
};
using nt_vector = std::vector<non_terminal>;
template<typename Container, typename Iterator>
struct direction
{
};
template<typename Container>
struct direction<Container, typename Container::const_iterator>
{
typename Container::const_iterator begin(const Container& c)
{
return c.cbegin();
}
typename Container::const_iterator end(const Container& c)
{
return c.cend();
}
};
template<typename Container>
struct direction<Container, typename Container::const_reverse_iterator>
{
typename Container::const_reverse_iterator begin(const Container& c)
{
return c.crbegin();
}
typename Container::const_reverse_iterator end(const Container& c)
{
return c.crend();
}
};
std::size_t g_rule_name = ~0;
std::size_t g_sub_rule_idx = ~0;
std::size_t g_rule_item1 = ~0;
std::size_t g_rule_item2 = ~0;
std::size_t g_term_name_idx = ~0;
std::size_t g_term_rule_idx = ~0;
std::size_t g_nts_title = ~0;
std::size_t g_non_term_name_idx = ~0;
std::size_t g_non_term_id_idx = ~0;
std::size_t g_left_id_idx = ~0;
std::size_t g_right_id_idx = ~0;
std::size_t g_state_title_idx = ~0;
std::size_t g_state_prod_ix = ~0;
std::size_t g_state_shift_idx = ~0;
std::size_t g_state_reduce_idx = ~0;
std::size_t g_reduce_error_idx = ~0;
// Index from rule number to non terminal
int_vector g_rule_to_nt;
// Production for each rule
std::vector<char_ptr_pair_vector> g_rules;
// Terminals, with rules where they appear
std::vector<std::pair<char_ptr_pair, int_vector>> g_terminals;
// Nonterminals, with rules where they appear
nt_vector g_non_terminals;
void build_grammar(parsertl::state_machine &gsm, lexertl::state_machine &lsm)
{
parsertl::rules grules;
lexertl::rules lrules;
std::string warnings;
grules.token("ACCEPT Char GOTO Grammar Integer Name NL NON_TERMINALS_TITLE "
"OnLeft OnRight REDUCE ReduceReduce RUG_TITLE RUPC_TITLE SHIFT "
"ShiftReduce State STATE_TITLE TERMINALS_TITLE TUG_TITLE UNT_TITLE");
grules.push("start", "useless_nonterms terms_unused rules_useless "
"rules_useless_conflicts conflict_states grammar terminals non_terminals "
"states");
grules.push("useless_nonterms", "%empty | UNT_TITLE NL NL name_list NL NL");
grules.push("terms_unused", "%empty | TUG_TITLE NL NL name_list NL NL");
grules.push("rules_useless", "%empty | RUG_TITLE NL NL production_list NL");
grules.push("rules_useless_conflicts", "%empty | RUPC_TITLE NL NL production_list NL");
grules.push("name_list", "Name NL "
"| name_list Name NL");
grules.push("production_list", "production "
"| production_list production");
grules.push("production",
"opt_nl Integer Name ':' item_list NL or_item_list NL");
grules.push("item_list", "%empty "
"| item_list name_char");
grules.push("or_item_list", "%empty "
"| or_item_list Integer '|' item_list NL");
grules.push("conflict_states", "conflict_state_list NL NL");
grules.push("conflict_state_list", "State conflicts NL "
"| conflict_state_list State conflicts NL");
grules.push("conflicts", "Integer ShiftReduce "
"| Integer ReduceReduce "
"| conflicts ',' Integer ShiftReduce "
"| conflicts ',' Integer ReduceReduce");
grules.push("grammar", "Grammar NL NL gram_production_list NL");
grules.push("gram_production_list", "gram_production "
"| gram_production_list gram_production");
grules.push("gram_production", "Integer rule_name ':' gram_item_list NL sub_prod NL");
g_rule_name = grules.push("rule_name", "Name");
grules.push("gram_item_list", "%empty "
"| gram_item_list rule_item");
grules.push("sub_prod", "%empty "
"| sub_prod sub_rule_id '|' gram_item_list NL");
g_sub_rule_idx = grules.push("sub_rule_id", "Integer");
g_rule_item1 = grules.push("rule_item", "Name");
g_rule_item2 = grules.push("rule_item", "Char");
grules.push("terminals", "TERMINALS_TITLE NL NL terminal_list");
grules.push("terminal_list", "term_name '(' Integer ')' term_integer_list "
"| terminal_list term_name '(' Integer ')' term_integer_list");
g_term_name_idx = grules.push("term_name", "name_char");
grules.push("term_integer_list", "NL "
"| term_rule "
"| term_integer_list term_rule "
"| term_integer_list NL");
g_term_rule_idx = grules.push("term_rule", "Integer");
grules.push("non_terminals", "nts_title NL NL non_terminal_list");
g_nts_title = grules.push("nts_title", "NON_TERMINALS_TITLE");
// e.g: decimal (2971)
grules.push("non_terminal_list", "non_term_name '(' non_term_id ')' NL left_right "
"| non_terminal_list non_term_name '(' non_term_id ')' NL left_right");
g_non_term_name_idx = grules.push("non_term_name", "Name");
g_non_term_id_idx = grules.push("non_term_id", "Integer");
// e.g: on left: 4257, on right: 600 797
grules.push("left_right", "OnLeft left_int_list "
"| OnRight right_int_list "
"| left_right ',' opt_nl OnLeft left_int_list "
"| left_right ',' opt_nl OnRight right_int_list");
grules.push("left_int_list", "NL "
"| left_id "
"| left_int_list left_id "
"| left_int_list NL");
g_left_id_idx = grules.push("left_id", "Integer");
grules.push("right_int_list", "NL "
"| right_id "
"| right_int_list right_id "
"| right_int_list NL");
g_right_id_idx = grules.push("right_id", "Integer");
grules.push("opt_nl", "%empty | NL");
grules.push("states", "state "
"| states state");
grules.push("state", "state_title NL NL state_prod_list transitions");
g_state_title_idx = grules.push("state_title", "STATE_TITLE");
grules.push("state_prod_list", "state_prod "
"| state_prod_list state_prod");
g_state_prod_ix = grules.push("state_prod", "opt_nl state_prod_rule lhs item_dot_list NL");
grules.push("state_prod_rule", "Integer");
grules.push("lhs", "Name ':'");
grules.push("lhs", "'|'");
grules.push("item_dot_list", "%empty "
"| item_dot_list item");
grules.push("item", "name_char");
grules.push("item", "'.'");
grules.push("transitions", "NL "
"| action "
"| transitions action "
"| transitions NL");
grules.push("action", "name_char ACCEPT NL "
"| name_char GOTO Integer NL");
g_state_shift_idx = grules.push("action", "name_char SHIFT Integer NL");
g_state_reduce_idx = grules.push("action", "name_char REDUCE Integer '(' Name ')' NL");
g_reduce_error_idx = grules.push("action", "name_char '[' REDUCE Integer '(' Name ')' ']' NL");
grules.push("integer_list", "NL "
"| Integer "
"| integer_list Integer "
"| integer_list NL");
grules.push("name_char", "Name | Char");
//parsertl::debug::dump(grules, std::cout);
parsertl::generator::build(grules, gsm, &warnings);
lrules.push("[ \t]+|[/][*].{+}[\r\n]*?[*][/]", lrules.skip());
lrules.push("\r?\n", grules.token_id("NL"));
lrules.push("\\d+", grules.token_id("Integer"));
lrules.push(":", grules.token_id("':'"));
lrules.push("\\|", grules.token_id("'|'"));
lrules.push(",", grules.token_id("','"));
lrules.push("[.]", grules.token_id("'.'"));
lrules.push("[(]", grules.token_id("'('"));
lrules.push("[)]", grules.token_id("')'"));
lrules.push("\\[", grules.token_id("'['"));
lrules.push("\\]", grules.token_id("']'"));
lrules.push("'([^'\\\\]|\\\\.)'", grules.token_id("Char"));
lrules.push("accept", grules.token_id("ACCEPT"));
lrules.push("Grammar", grules.token_id("Grammar"));
lrules.push("State \\d+ conflicts: ", grules.token_id("State"));
lrules.push("state \\d+", grules.token_id("STATE_TITLE"));
lrules.push("reduce[/]reduce", grules.token_id("ReduceReduce"));
lrules.push("shift[/]reduce", grules.token_id("ShiftReduce"));
lrules.push("on left:", grules.token_id("OnLeft"));
lrules.push("on right:", grules.token_id("OnRight"));
lrules.push("Nonterminals useless in grammar", grules.token_id("UNT_TITLE"));
lrules.push("Terminals unused in grammar", grules.token_id("TUG_TITLE"));
lrules.push("Rules useless in grammar", grules.token_id("RUG_TITLE"));
lrules.push("Rules useless in parser due to conflicts", grules.token_id("RUPC_TITLE"));
lrules.push("Terminals, with rules where they appear", grules.token_id("TERMINALS_TITLE"));
lrules.push("Nonterminals, with rules where they appear", grules.token_id("NON_TERMINALS_TITLE"));
lrules.push("go to state", grules.token_id("GOTO"));
lrules.push("reduce using rule", grules.token_id("REDUCE"));
lrules.push("shift, and go to state", grules.token_id("SHIFT"));
lrules.push("$?[A-Z_a-z][0-9A-Z_a-z]*", grules.token_id("Name"));
lexertl::generator::build(lrules, lsm);
}
bool nullable(nt_vector::const_iterator &iter)
{
for (const int rule : iter->_lhs)
{
if (g_rules[rule].empty())
return true;
}
return false;
}
bool nullable(const int start_rule)
{
int nt = g_rule_to_nt[start_rule];
auto iter = std::find_if(g_non_terminals.begin(),
g_non_terminals.end(), [&nt](const auto& rec)
{
return nt == rec._id;
});
return nullable(iter);
}
void output(const char* first, const char* second, std::ostream& ss)
{
for (; first < second; ++first)
{
ss << *first;
}
}
void display_match(const bool prev_matched, const int rule,
nt_vector::const_iterator &nt_iter,
const char_ptr_pair_vector &curr_rhs,
char_ptr_pair_vector::const_iterator& next,
char_ptr_pair_vector::const_iterator& end)
{
if (!prev_matched)
std::cout << '\n';
std::cout << rule << ' ';
output(nt_iter->_name.first, nt_iter->_name.second, std::cout);
std::cout << ':';
for (auto iter = curr_rhs.begin(); iter != next; ++iter)
{
std::cout << ' ';
output(iter->first, iter->second, std::cout);
}
std::cout << " .";
for (auto iter = next; iter != end; ++iter)
{
std::cout << ' ';
output(iter->first, iter->second, std::cout);
}
std::cout << '\n';
}
bool find_match(const std::string& terminal, int_char_ptr_map& lhs_nts,
int_char_ptr_map& rhs_nts)
{
bool matched = false;
const char* first = terminal.c_str();
const char* second = first + terminal.size();
for (const auto& lhs : lhs_nts)
{
const int id = lhs.first;
auto nt_iter = std::find_if(g_non_terminals.begin(),
g_non_terminals.end(),
[id](const auto& nt)
{
return id == nt._id;
});
for (const int rule : nt_iter->_rhs)
{
const auto& curr_rhs = g_rules[rule];
auto iter = curr_rhs.begin();
auto end = curr_rhs.end();
for (; iter != end; ++iter)
{
if (std::equal(nt_iter->_name.first, nt_iter->_name.second,
iter->first, iter->second))
{
auto next = iter;
++next;
if (next != end)
{
if (std::equal(first, second, next->first, next->second))
{
const int id = g_rule_to_nt[rule];
nt_iter = std::find_if(g_non_terminals.begin(),
g_non_terminals.end(),
[id](const auto& nt)
{
return id == nt._id;
});
display_match(matched, rule, nt_iter, curr_rhs, next, end);
// If terminal matched directly, no need to set matched flag
// as there is no additional rule to output.
}
else
{
for (const auto& rhs : rhs_nts)
{
if (std::equal(rhs.second.first,
rhs.second.second, next->first,
next->second))
{
const int id = g_rule_to_nt[rule];
auto nt_iter =
std::find_if(g_non_terminals.begin(),
g_non_terminals.end(),
[id](const auto& nt)
{
return id == nt._id;
});
display_match(matched, rule, nt_iter, curr_rhs,
next, end);
matched = true;
}
}
}
}
}
}
}
}
return matched;
}
// Recursively find all nts beginning/ending with start_nt
template<typename Container, typename Iterator>
void recurse(int start_nt, int_char_ptr_map& nts)
{
direction<Container, Iterator> d;
std::queue<int> queue;
for (queue.push(start_nt); !queue.empty(); queue.pop())
{
int curr_nt = queue.front();
auto nt_iter = std::find_if(g_non_terminals.begin(),
g_non_terminals.end(), [curr_nt](const auto& nt)
{
return curr_nt == nt._id;
});
nts[curr_nt] = nt_iter->_name;
for (const int rule : nt_iter->_rhs)
{
auto& rhs = g_rules[rule];
Iterator iter = d.begin(rhs);
Iterator end = d.end(rhs);
for (; iter != end; ++iter)
{
if (std::equal(nt_iter->_name.first, nt_iter->_name.second,
iter->first, iter->second))
{
curr_nt = g_rule_to_nt[rule];
if (nts.find(curr_nt) == nts.end())
queue.push(curr_nt);
break;
}
else
{
auto curr_iter = std::find_if(g_non_terminals.begin(),
g_non_terminals.end(), [&iter](const auto& nt)
{
return std::equal(iter->first, iter->second,
nt._name.first, nt._name.second);
});
if (curr_iter == g_non_terminals.end() ||
!nullable(curr_iter))
{
break;
}
}
}
}
}
}
void process_clash(const int reduce_rule, const std::string& terminal)
{
int reduce_nt = g_rule_to_nt[reduce_rule];
const char* first = terminal.c_str();
const char* second = first + terminal.size();
auto term_iter = std::find_if(g_terminals.begin(),
g_terminals.end(), [first, second](const auto& term)
{
return std::equal(first, second, term.first.first, term.first.second);
});
int_char_ptr_map reduction_nts;
// Recursively look for reduce_nt at the end of rules
recurse<char_ptr_pair_vector, char_ptr_pair_vector::const_reverse_iterator>
(reduce_nt, reduction_nts);
// Look for rules starting with terminal
for (const int rule : term_iter->second)
{
auto& rhs = g_rules[rule];
auto iter = rhs.begin();
auto end = rhs.end();
for (; iter != end; ++iter)
{
if (std::equal(first, second, iter->first, iter->second))
{
const int shift_nt = g_rule_to_nt[rule];
int_char_ptr_map shift_nts;
// Recursively look for reduce_nt at the end of rules
recurse<char_ptr_pair_vector, char_ptr_pair_vector::const_iterator>
(shift_nt, shift_nts);
if (find_match(terminal, reduction_nts, shift_nts))
{
const int id = g_rule_to_nt[rule];
auto nt_iter = std::find_if(g_non_terminals.begin(),
g_non_terminals.end(),
[id](const auto& nt)
{
return id == nt._id;
});
const auto& rhs = g_rules[rule];
std::cout << rule << ' ';
output(nt_iter->_name.first, nt_iter->_name.second, std::cout);
std::cout << ": .";
for (const auto& item : rhs)
{
std::cout << ' ';
output(item.first, item.second, std::cout);
}
std::cout << '\n';
}
break;
}
else
{
auto curr_iter = std::find_if(g_non_terminals.begin(),
g_non_terminals.end(), [&iter](const auto& nt)
{
return std::equal(iter->first, iter->second,
nt._name.first, nt._name.second);
});
if (curr_iter == g_non_terminals.end() || !nullable(curr_iter))
{
break;
}
}
}
}
}
void process(const char* pathname)
{
parsertl::state_machine gsm;
lexertl::state_machine lsm;
lexertl::memory_file mf(pathname);
build_grammar(gsm, lsm);
lexertl::citerator iter(mf.data(), mf.data() + mf.size(), lsm);
parsertl::match_results results(iter->id, gsm);
using token = parsertl::token<lexertl::citerator>;
token::token_vector productions;
int curr_prod = -1;
// Used in population of g_rule_to_nt
std::vector<std::string> nt_names;
std::vector<char_ptr_pair> state_productions;
std::map<std::string, int> state_shifts;
std::map<std::string, std::pair<int, std::string>> state_reduces;
int state = -1;
for (; results.entry.action != parsertl::action::accept &&
results.entry.action != parsertl::action::error; )
{
switch (results.entry.action)
{
case parsertl::action::reduce:
if (results.entry.param == g_rule_name)
{
// rule_name: Name;
std::string name = results.dollar(0, gsm, productions).str();
auto iter = std::find(nt_names.begin(), nt_names.end(), name);
if (iter == nt_names.end())
{
curr_prod = static_cast<int>(nt_names.size());
nt_names.emplace_back(std::move(name));
}
else
{
curr_prod = static_cast<int>(iter - nt_names.begin());
}
g_rule_to_nt.push_back(curr_prod);
g_rules.emplace_back(char_ptr_pair_vector());
}
else if (results.entry.param == g_sub_rule_idx)
{
// sub_rule_id: Integer;
g_rule_to_nt.push_back(curr_prod);
g_rules.emplace_back(char_ptr_pair_vector());
}
else if (results.entry.param == g_rule_item1 ||
results.entry.param == g_rule_item2)
{
// rule_item: Name | Char;
const token& token = results.dollar(0, gsm, productions);
if (*token.first != '$')
g_rules.back().emplace_back(std::pair(token.first, token.second));
}
else if (results.entry.param == g_term_name_idx)
{
// term_name: name_char;
const token& token = results.dollar(0, gsm, productions);
g_terminals.emplace_back(std::pair(token.first, token.second),
int_vector());
}
else if (results.entry.param == g_term_rule_idx)
{
// term_rule: Integer;
const token& token = results.dollar(0, gsm, productions);
g_terminals.back().second.push_back(atoi(token.first));
}
else if (results.entry.param == g_nts_title)
{
// nts_title: NON_TERMINALS_TITLE;
const int offset = static_cast<int>(g_terminals.size() + 1);
std::vector<std::string> temp;
for (int& val : g_rule_to_nt)
{
val += offset;
}
// Finished with temporary vector
nt_names.swap(temp);
}
else if (results.entry.param == g_non_term_name_idx)
{
// non_term_name: Name;
const token& name = results.dollar(0, gsm, productions);
g_non_terminals.emplace_back(non_terminal(std::pair(name.first, name.second)));
}
else if (results.entry.param == g_non_term_id_idx)
{
// non_term_id: Integer;
const token& id = results.dollar(0, gsm, productions);
g_non_terminals.back()._id = atoi(id.first);
}
else if (results.entry.param == g_left_id_idx)
{
// left_id: Integer;
const token& id = results.dollar(0, gsm, productions);
g_non_terminals.back()._lhs.push_back(atoi(id.first));
}
else if (results.entry.param == g_right_id_idx)
{
// right_id: Integer;
const token& id = results.dollar(0, gsm, productions);
g_non_terminals.back()._rhs.push_back(atoi(id.first));
}
else if (results.entry.param == g_state_title_idx)
{
// state_title: STATE_TITLE;
const token& title = results.dollar(0, gsm, productions);
const char* curr = title.second;
for (; *(curr - 1) != ' '; --curr);
state = atoi(curr);
state_productions.clear();
state_shifts.clear();
state_reduces.clear();
}
else if (results.entry.param == g_state_prod_ix)
{
// state_prod: opt_nl state_prod_rule lhs item_dot_list NL;
state_productions.
emplace_back(char_ptr_pair(results.dollar(1, gsm, productions).first,
results.dollar(3, gsm, productions).second));
}
else if (results.entry.param == g_state_shift_idx)
{
// action: name_char SHIFT Integer NL;
state_shifts[results.dollar(0, gsm, productions).str()] =
atoi(results.dollar(2, gsm, productions).first);
}
else if (results.entry.param == g_state_reduce_idx)
{
// action: name_char REDUCE Integer '(' Name ')' NL;
state_reduces[results.dollar(0, gsm, productions).str()] =
std::pair(atoi(results.dollar(2, gsm, productions).first),
results.dollar(4, gsm, productions).str());
}
else if (results.entry.param == g_reduce_error_idx)
{
// action: name_char '[' REDUCE Integer '(' Name ')' ']' NL;
const std::string terminal = results.dollar(0, gsm, productions).str();
auto shift_iter = state_shifts.find(terminal);
const int rule =
atoi(results.dollar(3, gsm, productions).first);
std::cout << "State " << state << "\n\n";
if (shift_iter == state_shifts.end())
{
// Reduce/Reduce error
auto reduce_iter = state_reduces.find(terminal);
auto& reduce_rule = g_rules[reduce_iter->second.first];
auto& clash_rule = g_rules[rule];
std::cout << "reduce/reduce error(s):\n\n";
std::cout << terminal << ' ';
std::cout << reduce_iter->second.second << ':';
for (auto& item : reduce_rule)
{
std::cout << ' ';
output(item.first, item.second, std::cout);
}
std::cout << " .\n";
std::cout << terminal << ' ';
std::cout << results.dollar(5, gsm, productions).str() << ':';
for (auto& item : clash_rule)
{
std::cout << ' ';
output(item.first, item.second, std::cout);
}
std::cout << " .\n\n";
}
else
{
// Shift/Reduce error
for (const auto& pair : state_productions)
{
output(pair.first, pair.second, std::cout);
std::cout << '\n';
}
std::cout << '\n';
std::cout << terminal;
output(results.dollar(0, gsm, productions).second,
results.dollar(1, gsm, productions).first, std::cout);
std::cout << "shift, and go to state " <<
shift_iter->second << '\n';
output(results.dollar(0, gsm, productions).first,
results.dollar(8, gsm, productions).second, std::cout);
process_clash(rule, terminal);
std::cout << '\n';
}
}
break;
}
parsertl::lookup(iter, gsm, results, productions);
}
if (results.entry.action == parsertl::action::error)
throw std::runtime_error("Parse error");
}
int main()
{
try
{
process(R"(E:\Apps\bison-2.4.1\bin\so25301538.output)");
}
catch (const std::exception &e)
{
std::cout << e.what() << '\n';
}
}
History
27/05/2020 First posted.
28/05/2020 Improved the explanation in the example.
30/05/2020 Tried this with the grammar at https://stackoverflow.com/questions/25301538/fix-shift-reduce-conflict-in-bison-grammar and realised the grammar for the bison .output file needed some optional rules.
01/06/2020 Small fix to find_match() in case where terminal is found on rhs directly.
21/01/2023 Updated to latest parsertl interface.
15/02/2024 Updated to use lexertl17 and parsertl17.