Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
heshamelsawaf committed Apr 27, 2018
2 parents 8740441 + 9dee693 commit 52f2094
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 20 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ LEXOBJS = machine.o lexer.o tokenizer.o lex.o
LEXGENOBJS = machine.o dfa.o rexplib.o rexparser.o lexgen.o
PARSERGENOBJS = cfg.o parsergen.o cfgparser.o machine.o lexer.o error.o parsetable.o parsetable.pb.o ll1_parser.o sentential_expression.o leftmost_derivation.o
PARSEROBJS = cfg.o parser_main.o machine.o lexer.o error.o parsetable.o parsetable.pb.o ll1_parser.o cfgparser.o sentential_expression.o leftmost_derivation.o
TESTOBJS = cfg.o cfgparser.o test/test_all.o test/test_ll1.o test/test_main.o
TESTOBJS = cfg.o ll1_parser.o machine.o lexer.o parsetable.o error.o parsetable.pb.o sentential_expression.o leftmost_derivation.o cfgparser.o test/test_all.o test/test_ll1.o test/test_main.o
CC = g++
CFLAGS = -O2 --std=c++11 -Wall
DFLAGS = -ggdb
Expand Down Expand Up @@ -41,7 +41,7 @@ $(PARSER): $(PARSEROBJS)


$(TEST): $(TESTOBJS)
$(CC) $(CFLAGS) $(DFLAGS) $(TESTOBJS) -o test_main $(LDFLAGS)
$(CC) $(CFLAGS) $(DFLAGS) $(TESTOBJS) -o test_main $(LDFLAGS) $(PBFLAGS)
echo Target $(TEST) compiled successfully

debug: $(OBJS)
Expand Down
21 changes: 21 additions & 0 deletions cfg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -440,9 +440,30 @@ bool cfg::to_ll1() {
return factored || removed_recursion;
}

void print_range(std::unordered_set<std::string> set) {
std::cout << "{";
for (std::string key : set) {
std::cout << (key == EPS ? "ε" : key) << " ";
}
std::cout << "}" << std::endl;
}


void cfg::build() {
build_first(this);
build_follow(this);
// for (std::string sym_str : get_symbols()) {
// auto sym = get_symbol(sym_str);
// if (!sym->is_terminal()) {
// auto first = sym->get_first();
// auto follow = sym->get_follow();
// std::cout << "Symbol:" << sym_str << "\nFirst: ";
// print_range(first);
// std::cout << "Follow: ";
// print_range(follow);
// std::cout << std::endl;
// }
// }
}

std::ostream &operator<<(std::ostream& stream, const cfg::symbol::production &prod) {
Expand Down
2 changes: 1 addition & 1 deletion lan.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# PRIMITIVE_TYPE ::= 'int' | 'float'
# IF ::= 'if' '(' EXPRESSION ')' '{' STATEMENT '}' 'else' '{' STATEMENT '}'
# WHILE ::= 'while' '(' EXPRESSION ')' '{' STATEMENT '}'
# ASSIGNMENT ::= 'id' '=' EXPRESSION ';'
# ASSIGNMENT ::= 'id' EXPRESSION ';'
# EXPRESSION ::= SIMPLE_EXPRESSION
| SIMPLE_EXPRESSION 'relop' SIMPLE_EXPRESSION
# SIMPLE_EXPRESSION ::= TERM | SIGN TERM | SIMPLE_EXPRESSION 'addop' TERM
Expand Down
25 changes: 16 additions & 9 deletions ll1_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ std::string production_to_string(std::string lhs, std::vector<std::string> rhs)
std::string get_message(std::string cur_symbol, std::string cur_token, error_type error) {
switch (error) {
case MISSING_SYMBOL:
return "Error: Expected \"" + cur_symbol + "\"";
return "Error: Expected \"" + cur_symbol + "\", Found: \"" + cur_token + "\"";
break;
case INVALID_TOKEN:
return "Error: Invalid token or identifier \"" + cur_token + "\"";
Expand Down Expand Up @@ -64,6 +64,7 @@ leftmost_derivation parse::parse_ll1(parsetable &parsetable, machine &mac, std::


int step = 0;
int cur_symbol_idx = 0;
lexer::token cur_token = lex.next_token(input_stream);
while (true) {
std::string cur_symbol = stack.back();
Expand All @@ -80,19 +81,17 @@ leftmost_derivation parse::parse_ll1(parsetable &parsetable, machine &mac, std::
derivations.push_back(std::vector<std::string> ());
productions.push_back("");

int cur_symbol_idx = 0;


if (step > 0) {
for (std::string symbol : derivations[step - 1]) {
for (int i = 0 ; i < cur_symbol_idx ; i++) {
std::string symbol = derivations[step - 1][i];
if (!parsetable.is_nonterm(symbol))
derivations[step].push_back(symbol);
else
break;
}
}

cur_symbol_idx = derivations[step].size();
// cur_symbol_idx = derivations[step].size();

if (prev_is_production) {
productions[step] = std::string(prev_production);
Expand All @@ -115,10 +114,11 @@ leftmost_derivation parse::parse_ll1(parsetable &parsetable, machine &mac, std::
step++;


if (cur_token_class != EOI && !parsetable.is_nonterm(cur_symbol)) {
if (!parsetable.is_nonterm(cur_symbol)) {
if (cur_symbol == cur_token_class) {
stack.pop_back();
substitute = true;
cur_symbol_idx++;
substitute_str = cur_token.get_str();
// std::cout << cur_symbol << ' ' << substitute_str << std::endl;
cur_token = lex.next_token(input_stream);
Expand All @@ -132,6 +132,13 @@ leftmost_derivation parse::parse_ll1(parsetable &parsetable, machine &mac, std::
prev_is_production = true;
// std::cerr << "Error near: \"" << cur_token.get_str() << "\"" << std::endl;
cur_token = lex.next_token(input_stream);
} else if (cur_token_class == EOI) {
errors.push_back(error(cur_token.get_line(),
cur_token.get_col(),
get_message(cur_symbol, cur_token.get_str(), REACHED_EOF)));
prev_production = get_message(cur_symbol, cur_token.get_str(), REACHED_EOF);
prev_is_production = true;
stack.pop_back();
} else {
stack.pop_back();
errors.push_back(error(cur_token.get_line(),
Expand Down Expand Up @@ -160,8 +167,8 @@ leftmost_derivation parse::parse_ll1(parsetable &parsetable, machine &mac, std::
}
break;
case parsetable::entry::SYNC:
prev_production = production_to_string(cur_symbol, entry.productions);
prev_is_production = "SYNC";
prev_production = "SYNC";
prev_is_production = true;
stack.pop_back();
break;
case parsetable::entry::ERROR:
Expand Down
11 changes: 7 additions & 4 deletions parsetable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ parsetable::parsetable(cfg grammar) {
if (grammar.get_symbol(first)->is_eps()) {
has_eps_prod = true;
} else {
if (table[sym][first].state != parsetable::entry::States::ERROR) {
if (table[sym][first].state == parsetable::entry::States::PROD) {
throw std::invalid_argument(
"Grammar is not LL(1). Entry [" + sym + "," + first + "] has duplicate values.");
}
Expand All @@ -48,14 +48,17 @@ parsetable::parsetable(cfg grammar) {
}
}
for (std::string follow : grammar.get_symbol(sym)->get_follow()) {
if (table[sym][follow].state != parsetable::entry::States::ERROR) {
if (table[sym][follow].state == parsetable::entry::States::PROD && has_eps_prod) {
throw std::invalid_argument(
"Grammar is not LL(1). Entry [" + sym + "," + follow + "] has duplicate values.");
}
if (has_eps_prod) {
table[sym][follow].productions.push_back(EPS_STR);
table[sym][follow].state = parsetable::entry::States::PROD;
} else {
if (table[sym][follow].state == parsetable::entry::States::PROD) {
continue;
}
table[sym][follow].state = parsetable::entry::States::SYNC;
}
}
Expand All @@ -67,8 +70,8 @@ std::string parsetable::get_starting_symbol_key() const {
}

parsetable::entry parsetable::get_entry(std::string nonterm, std::string next_input) {
if (!parsetable::table.count(nonterm)) {
throw std::invalid_argument("received invalid nonterminal symbol");
if (!is_nonterm(nonterm)) {
throw std::invalid_argument("received invalid nonterminal symbol: '" + nonterm + "'");
}
if (!parsetable::table[nonterm].count(next_input)) {
cfg::symbol::production p;
Expand Down
16 changes: 12 additions & 4 deletions test/test_all.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
#include <vector>
#include "../cfgparser.h"
#include "../cfg.h"
#include "../ll1_parser.h"

using namespace std;

TEST(FIRST, ALL1) {
ifstream grammar_ifs;

grammar_ifs.open("lan2.cfg");

grammar_ifs.open("lan2.cfg");
grammar_ifs.open("lan.cfg");

auto s = [&grammar_ifs] {
std::ostringstream ss;
Expand All @@ -32,9 +31,18 @@ TEST(FIRST, ALL1) {
std::cout << "Before:\n\n" << _cfg << std::endl;
_cfg.to_ll1();
std::cout << "=======================\nAfter:\n\n" << _cfg << std::endl;
_cfg.build();

ifstream ttab("m.out");
machine m("");
ttab >> m;

parsetable ptab(_cfg);
std::cout << ptab << std::endl;
std::ifstream input_stream("in.c");
leftmost_derivation d = parse::parse_ll1(ptab, m, input_stream);
std::cout << d << std::endl;

grammar_ifs.close();

grammar_ifs.close();

Expand Down

0 comments on commit 52f2094

Please sign in to comment.