Compiler 56

Download as pdf or txt
Download as pdf or txt
You are on page 1of 39

EX NO: 5 Study of LEX Tool

DATE :

LEX Tool

 Lex is a tool used for generating lexical analyzers, also known as lexers or scanners, for
processing text or source code.
 Lex is often used in combination with the yacc or Bison parser generator to create complete
syntax analysis tools for programming languages and other formal languages.
 Lex, along with yacc (or Bison), has been a fundamental tool in the development of compilers
and interpreters for programming languages.

Structure of a LEX File

%{
Definition Section #include<stdio.h>
%}

Rules Section %%
[0−9]+ {printf(“%s is an
integer”, yytext);
%%

User Code Section


int main(){
Yylex();
Return 0;}

Definition Section :

C code
➢ Is used for defining file variables, and for prototypes of routines that are defined in the code
segment.

Definitions
➢ A definition is very much like a #define cpp directive.

State definitions
➢ A state definition lookslike %s STATE, and by default a state INITIAL is already given.

Rules Section :
• The rulessection has a number of pattern−action pairs
• The patterns are regular expressions and the actions are either a single C command, or a sequence
enclosed in braces.
• If more than one rule matches the input, the longer match istaken. If two matches are the same
length, the earlier one in the list is taken.

User Code Section :


• If the lex program is to be used on its own, this section will contain a main program.
• If you leave this section empty you will get the default main:
int main() { yylex();
return 0; }
where yylex is the parser that is built from the rules.

Lex Regular Expressions:

Lex Predefined Variables:


Function of LEX :

Simple Lex Program :

Compilation and Output :

Properties:

• Regular Expression-Based Tokenization: Lex allows you to define token recognition rules using
regular expressions. This makes it easy to specify complex patternsfor identifying tokens within
the input text.
• Automatic Code Generation: Lex generates the lexer code in a target programming language
(usually C or C++). This code can be compiled into an executable program, saving developers
from writing the lexer code manually.
• Customizable Actions: For each regular expression rule, you can specify associated actions,
which determine what happens when a particular pattern is matched. These actions can include
updating variables, recording token values, or performing other custom logic.
• Efficient DFA-Based Matching: Lex uses a deterministic finite automaton (DFA) to perform
efficient pattern matching. This results in fast and optimized tokenization of input text.

Result :

Thus, study of lex tool was done successfully.


Exp No: 06 a
Date

PROGRAM CODE:

%{

int positive_no = 0, negative_no = 0;

%}

%%

^[-][0-9]+ {negative_no++;

printf("negative number = %s\n",

yytext);} // negative number

[0-9]+ {positive_no++;

printf("positive number = %s\n",

yytext);} // positive number

%%
int yywrap(){}

int main()

yylex();

printf ("number of positive numbers = %d,"

"number of negative numbers = %d\n",

positive_no, negative_no);

return 0;

Output:

RESULT:

Thus, we executed a Lex program for counting the positive and negative numbers.
Exp No: 06 b
Date

PROGRAM CODE:

%{

#include<stdio.h>

int lc=0,sc=0,tc=0,ch=0,wc=0;

%%

[\n] { lc++; ch+=yyleng;}

[ \t] { sc++; ch+=yyleng;}

[^\t] { tc++; ch+=yyleng;}

[^\t\n ]+ { wc++; ch+=yyleng;}

%%

int yywrap(){ return 1; }

int main(){

printf("Enter the stream of text : ");


yylex();

printf("Number of lines : %d\n",lc);

printf("Number of spaces : %d\n",sc);

printf("Number of tabs, words, charc : %d , %d , %d\n",tc,wc,ch);

return 0;

Output:

RESULT:

Thus, we executed a Lex program for counting number of words, characters and lines.
Exp No: 06 c
Date:

PROGRAM CODE:

%{

int vow_count=0;

int const_count =0;

%}

%%

[aeiouAEIOU] {vow_count++;}

[a-zA-Z] {const_count++;}

%%

int yywrap(){}

int main()
{

printf("Enter the string of vowels and consonants:");

yylex();

printf("Number of vowels are: %d\n", vow_count);

printf("Number of consonants are: %d\n", const_count);

return 0;

Output:

RESULT:

Thus, we executed a Lex program for counting vowels and consonants.


Exp No: 06 d
Date:

PROGRAM CODE:

%{

#include<stdio.h>

int i;

%}

%%

[0-9]+ {i=atoi(yytext);

if(i%2==0)

printf("Even");

else

printf("Odd");}

%%

int yywrap(){}
int main()

yylex();

return 0;

Output:

RESULT:

Thus, we executed a Lex program to decide whether number is odd or even.


Exp No: 06 e
Date:

PROGRAM CODE:

%{

#include<stdio.h>

#include<stdlib.h>

int flag=0;

%}

%%

[a-z . 0-9]+@[a-z]+".com"|".in"|".education" (flag=1; }

%%

int yywrap(){}

int main() {

yylex();
if(flag==1)

printf("Valid \n");

else

printf("Not Valid \n");

Output:

RESULT:

Thus, we successfully executed a Lex program for email validation.


Exp No: 06 f
Date:

PROGRAM CODE:

%{

#include<stdio.h>

%}

%%

((0[1-9])|([1-2][0-9])|(3[0-1]))\/((0[1-9])|(1[0-2]))\/(19[0-9]{2}|2[0-9]{3}) printf("Valid");

.* printf("Invalid");

%%

int main()

yylex();

return 0; }
OUTPUT:

RESULT:

Hence, we have successfully executed a Lex program for validation of date.


Exp No: 06 g
Date

PROGRAM CODE:

%%
[Aa].
* printf(“string starts with ‘A’ or ‘a’: %s\n”,yytext);
.
\n
%%
int main() {
yylex();
return 0;}

Output:

RESULT:

Thus, we executed a Lex program to determine if string starts with 'a' or not.
Exp No: 06 h
Date:

PROGRAM CODE:

%{

%}

%option noyywrap

%%

[1-9][0-9]{9} {printf("\nMobile Number Valid\n");}

.+ {printf("\nMobile Number Invalid\n");}

%%

int main()

printf("\nEnter Mobile Number : ");


yylex();

printf("\n");

return 0;

OUTPUT:

RESULT:

Hence, we have successfully executed a Lex program for mobile number validation.
Exp No: 06 i
Date

PROGRAM CODE:

%{

%}

%%

[0-9]+[.][0-9]+ printf("%s is a floating point number\n",yytext);

int|float|char|double|void printf("%s is a datatype\n",yytext);

[0-9]+ printf("%s is an integer number\n",yytext);

[a-z]+[()] printf("%s is a function\n",yytext);

[a-z]+ printf("%s is an identifier\n",yytext);

[+=*/-] printf("%s is an operator\n",yytext);

; printf("%s is an delimiter\n",yytext);

, printf("%s is a separator\n",yytext);

[#][a-z\.h]+ printf("%s is a preprocessor\n",yytext);

%%

int yywrap(void)
{

return 1;

int main()

// reads input from a file named test.c rather than terminal

freopen("test.c", "r", stdin);

yylex();

return 0;

Test.c:

OUTPUT:

RESULT:

Thus, we successfully executed a Lex program for token separation.


Exp No: 07
Date: STUDY OF YACC TOOL

YACC (Yet Another Compiler Compiler)," serves as a powerful grammar parser and generator. In essence, it
functions as a tool that takes in a grammar specification and transforms it into executable code capable of
meticulously structuring input tokens into a coherent syntactic tree, aligning seamlessly with the
prescribed grammar rules.

Stephen C. Johnson developed YACC in compiler design in the early 1970s. Initially, the YACC was written in
the B programming language and was soon rewritten in C. It was originally designed for being
complemented by Lex.

In addition to that, YACC was also rewritten in OCaml, Ratfor, ML, ADA, Pascal, Java < Python, Ruby and Go.

The input of YACC in compiler design is the rule or grammar, and the output is a C program.

Parts of a YACC Program in Compiler Design

The parts of YACC program are divided into three sections:

/* definitions */

....

%%

/* rules */

....

%%

/* auxiliary routines */

....

Definitions: these include the header files and any token information used in the syntax. These are located
at the top of the input file. Here, the tokens are defined using a modulus sign. In the YACC, numbers are
automatically assigned for tokens.

%token ID

{% #include <stdio.h> %}
Rules: The rules are defined between %% and %%. These rules define the actions for when the token is
scanned and are executed when a token matches the grammar.

Auxiliary Routines: Auxiliary routines contain the function required in the rules section. This Auxiliary
section includes the main() function, where the yyparse() function is always called.

This yyparse() function plays the role of reading the token, performing actions and then returning to the
main() after the execution or in the case of an error.

0 is returned after successful parsing and 1 is returned after an unsuccessful parsing.

The YACC is responsible for converting these sections into subroutines which will examine the inputs. This
process is made to work by a call to a low-level scanner and is named Parsing

Let us now study the working of YACC in compiler design.

Workings of YACC:

YACC in compiler design is set to work in C programming language along with its parser generator.

• An input with a .y extension is given.


• The file is invoked, and 2 files, y.tab.h and y.tab.c, are created. These files contain long
codes implementing the LARl (1) Parser for grammar.
• This file then provides yyparse.y, which tries to parse a valid sentence successfully.
 For the output files,
• If called with the –d option in the command line, YACC produces y.tab.h with all its
specific definitions.
• If called with the –v option, YACC produces y.output having a textual description of the
LALR(1) parsing table.
 How to Execute
• Save the program code to a file with a .y extension, such as calculator.y.
• Install Yacc/Bison on your system, if it is not already installed.
• Open a terminal or command prompt and navigate to the directory where the .y file is
saved.
• Run the following command to generate a C source file from the Yacc/Bison grammar
file
 yacc -d filename.y
• This will create two output files: y.tab.c (the C source file) and y.tab.h (the header file).
• Compile the generated C source file and link it with the Yacc/Bison runtime library using
a C compiler such as GCC:
 gcc -o calculator y.tab.c -ly
• Run the compiled program.

RESULT:

Hence the study of YACC tool has been done successfully.


Exp No: 08
Date

PROGRAM CODE:

Command: vi pp1.l

pp1.l

%{

#include<stdio.h>

#include "y.tab.h"

extern int yylval;

%}

%%

0 {yylval=0; return Zero;}

1 {yylval=1;return One;}

[ \t] return 0;\n return 0;


return yytext[0];

%%

Yacc Code:

%{
#include<stdio.h>
#include<stdlib.h>
#include "y.tab.h"
void yyerror(char *s);
int yylex();
%}
%token Zero One
%%
stmt:S;
S:SA | A;
A:Zero Zero | One One;
%%
int main()
{yyparse(); printf("Accepted\t");
exit(0);}
void yyerror(char *s)
(printf("Not Accepted\n");
exit(0);
OUTPUT:

RESULT:

Hence, we have successfully executed the parser program using YACC


Exp No: 09
Date:

PROGRAM CODE:

calculator.l for the Lex code:

%{

#include "y.tab.h"
%}

%%

[0-9]+ { yylval = atoi(yytext); return NUM; }

[-+*/\n] { return *yytext; }

[ \t] ;

. { yyerror("Invalid character"); }

%%

int yywrap() {

return 1;

calculator.y for the Yacc code:

%{

#include <stdio.h>

#include <stdlib.h>

%}

%token NUM

%left '+' '-'

%left '*' '/'

%%

calclist: /* empty */

| calclist exp '\n' { printf("Result: %d\n", $2); }

exp: NUM

| exp '+' exp { $$ = $1 + $3; }

| exp '-' exp { $$ = $1 - $3; }

| exp '*' exp { $$ = $1 * $3; }

| exp '/' exp {

if ($3 == 0) {

yyerror("Division by zero");

exit(1);
} else {

$$ = $1 / $3;

| '(' exp ')' { $$ = $2; }

%%

int main() {

yyparse();

return 0;

void yyerror(const char *s) {

fprintf(stderr, "%s\n", s);

Compile the program:

lex -o lex.yy.c calculator.l

yacc -d -o y.tab.c calculator.y

gcc lex.yy.c y.tab.c -o calculator -ll -ly

./calculator

OUTPUT:

RESULT:

Thus, we have successfully executed the calculator program using lex and yacc.
Exp No: 10
Date

PROGRAM CODE:

three_address.l:

%{

#include "y.tab.h"

%}

DIGIT [0-9]

ID [a-zA-Z][a-zA-Z0-9]*
%%

[ \t\n] ;

"+" { return ADD; }

"-" { return SUB; }

"*" { return MUL; }

"/" { return DIV; }

"=" { return ASSIGN; }

"(" { return LPAREN; }

")" { return RPAREN; }

{DIGIT}+ { yylval.num = atoi(yytext); return NUM; }

{ID} { yylval.id = strdup(yytext); return IDENTIFIER; }

. { yyerror("Invalid character"); }

%%

int yywrap() {

return 1;

three_address.y:

%{

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

int yylex();

void yyerror(const char *s);

int temp_count = 1;

%}

%union {

int num;

char *id;}

%token <num> NUM


%token <id> IDENTIFIER

%token ADD SUB MUL DIV ASSIGN LPAREN RPAREN

%left ADD SUB

%left MUL DIV

%%

program: stmt_list

stmt_list:

| stmt_list statement '\n' { printf("%s", $2); }

statement:

| IDENTIFIER ASSIGN expr {

printf("%s = %s\n", $1, $3);

free($3);

expr: expr ADD term { $$ = generate_code($1, $3, "+"); }

| expr SUB term { $$ = generate_code($1, $3, "-"); }

| term { $$ = $1; }

term: term MUL factor { $$ = generate_code($1, $3, "*"); }

| term DIV factor { $$ = generate_code($1, $3, "/"); }

| factor { $$ = $1; }

factor: NUM { $$ = strdup(yytext); }

| IDENTIFIER { $$ = strdup(yytext); }

| LPAREN expr RPAREN { $$ = $2; }

%%

int main() {

yyparse();

return 0;}

void yyerror(const char *s) {

fprintf(stderr, "%s\n", s);}

char* generate_code(char* arg1, char* arg2, char* op) {

char temp[10];
sprintf(temp, "t%d", temp_count++);

printf("%s = %s %s %s\n", temp, arg1, op, arg2);

return strdup(temp);

commands to generate the Lex and Yacc files and compile the program:

lex -o lex.yy.c three_address.l

yacc -d -o y.tab.c three_address.y

gcc lex.yy.c y.tab.c -o three_address -ll -ly

./three_address

INPUT:

OUTPUT:

RESULT:

Hence the Three Address Code Generation using LEX and YACC was implemented successfully.
Exp No: 11
Date:

PROGRAM CODE:

#include<stdio.h>

#include<string.h>

struct op

{
char l;

char r[20];

op[10], pr[10];

int main ()

int a, i, k, j, n, z = 0, m, q;

char *p, *l;

char temp, t;

char *tem;

printf ("Enter the Number of Values: ");

scanf ("%d", &n);

for (i = 0; i < n; i++)

printf ("Enter left for item %d: ", i + 1);

scanf (" %c", &op[i].l); // space added before %c to consume the newline character

printf ("Enter right for item %d: ", i + 1);

scanf ("%s", op[i].r);

printf ("\n\nIntermediate Code\n");

for (i = 0; i < n; i++)

printf ("%c=", op[i].l);

printf ("%s\n", op[i].r);

for (i = 0; i < n - 1; i++)

temp = op[i].l;

for (j = 0; j < n; j++)

{
p = strchr (op[j].r, temp);

if (p)

pr[z].l = op[i].l;

strcpy (pr[z].r, op[i].r);

z++;

pr[z].l = op[n - 1].l;

strcpy (pr[z].r, op[n - 1].r);

z++;

printf ("\nAfter Dead Code Elimination\n");

for (k = 0; k < z; k++)

printf ("%c \t=", pr[k].l);

printf ("%s\n", pr[k].r);

for (m = 0; m < z; m++)

tem = pr[m].r;

for (j = m + 1; j < z; j++)

p = strstr (tem, pr[j].r);

if (p)

t = pr[j].l;

pr[j].l = pr[m].l;

for (i = 0; i < z; i++)

{
l = strchr (pr[i].r, t);

if (l)

a = l - pr[i].r;

printf ("pos: %d\n", a);

pr[i].r[a] = pr[m].l;

printf ("Eliminate Common Expression\n");

for (i = 0; i < z; i++)

printf ("%c\t=", pr[i].l);

printf ("%s\n", pr[i].r);

for (i = 0; i < z; i++)

for (j = i + 1; j < z; j++)

q = strcmp (pr[i].r, pr[j].r);

if ((pr[i].l == pr[j].l) && !q)

pr[i].l = '\0';

printf ("Optimized Code\n");

for (i = 0; i < z; i++)


{

if (pr[i].l != '\0')

printf ("%c=", pr[i].l);

printf ("%s\n", pr[i].r); } }

return 0;}

INPUT:

OUTPUT:

RESULT:

Thus, a C program for code optimization has been developed and executed successfully.
Exp No: 12
Date:

PROGRAM CODE:

#include<stdio.h>

#include<conio.h>

#include<string.h>

void main()
{

int n,i,j;

char a[50][50];

printf("Enter the number of intermediate codes:");

scanf("%d",&n);

for(i=0;i<n;i++)

printf("Enter the 3 address code: %d ",i+1);

for(j=0;j<6;j++)

scanf("%c",&a[i][j]);}

printf("The generated code is: ");

for(i=0;i<n;i++)

printf("\nMov %c,R%d",a[i][3],i);

if(a[i][4]=='-')

printf("\nSub %c,R%d",a[i][5],i);}

if(a[i][4]=='+')

printf("\nAdd %c,R%d",a[i][5],i);}

if(a[i][4]=='*')

printf("\nMul %c,R%d",a[i][5],i);}

if(a[i][4]=='/')

printf("\nDiv %c,R%d",a[i][5],i);}

printf("\nMov R%d, %c",i,a[i][1]);

printf("\n");
}

getch();

INPUT:

OUTPUT:

RESULT:

Thus, a C program for code generation has been developed and executed successfully

You might also like