String Tokenization | C++ Program To Tokenised a Given Program

String Tokenization |

Tokenizing a string in C++

String Tokenization is nothing but splitting a string when encounters a delimiter. In the below code, a file is read which contains a simple C++ Program to be tokenized. It reads each and every string from the file and categorizes a string or special character as keywords, brackets, operators, and symbols,identifiers etc. for example, if it reads int, float, cout, etc it will categorize as keywords or it is Symbol if it reads special characters such as,;,,,(,),[,] etc, and if it is an operator such as +,-,*,\, etc then categorize as Operator and for any other variables it will define as identifiers an so on. In our example program, you can see the implementation of string tokenization for a file program and the output as shown below.

String Tokenization Program in C++

#include<iostream>
#include<fstream>
#include<string>
#include<cctype>
using namespace std;
string keywords[] = {"int","float","return","double","cout","using","namespace","std"};
string brackets[] = {"}","{","(",")","[","]"};
string operators[] = {"=","+","-","*","/","<<"};
string symbols[] = {";"};
bool isContained(string query,string arr[],int s)
{
for(int i=0;i<s;i++)
{
    if(query == arr[i])
        return true;
}
return false;
}
bool isConstant(string query)
{
for(int i=0;i<query.size();i++)
{
    if(!isdigit(query[i]))
        return false;
}
return true;
}
bool isIdentifier(string query)
{
if(isdigit(query[0]))
    return false;
for(int i=1;i<query.size();i++)
{
    if(!isalnum(query[i]))
        return false;
}
return true;
}
int main()
{
fstream file;
string line,word="";
bool flag=false;
file.open("Program.cpp",ios::in);
cout<<"Token\t\t\tClass"<<endl;
cout<<"-----\t\t\t-----"<<endl;
while(getline(file,line))
{
    //cout<<line<<endl;
    line.push_back('\0');
    if(line[0] == '#')
        cout<<line<<"\tPreprocessor Directive"<<endl;
    else
    {
        if(line.find("()") != string::npos)
        {
            flag = true;
        }
        for(int i=0;i<line.size();i++)
        {


            if(line[i] == ' ' || line[i] == '\0' || (flag && line[i]=='('))
            {
                if(flag && line[i]=='(')
                {
                    cout<<word<<"\t\t\tFunction"<<endl;
                    cout<<"("<<"\t\t\tBracket"<<endl;
                    flag = false;
                }
                else if(isContained(word,keywords,8))
                    cout<<word<<"\t\t\tKeyword"<<endl;
                else if(isContained(word,brackets,6))
                    cout<<word<<"\t\t\tBracket"<<endl;
                else if(isContained(word,operators,6))
                    cout<<word<<"\t\t\tOperator"<<endl;
                else if(isContained(word,symbols,1))
                    cout<<word<<"\t\t\tSymbol"<<endl;
                else if(isConstant(word))
                    cout<<word<<"\t\t\tConstant"<<endl;
                else if(isIdentifier(word))
                    cout<<word<<"\t\t\tIdentifier"<<endl;
                else
                    cout<<word<<"\t\t\tUndefined"<<endl;
                word.clear(); }
            else
            {
                word.push_back(line[i]);
            }}
    }
    word.clear();
    flag = false;
}
file.close();
return 0;
}


Program.cpp
#include<iostream>
using namespace std ;
int number()
{
return 5 ;
}
int main()
{
int a = 10 ;
int b = 20 ;
cout << a + b ;
return 0 ;
}


OUTPUT:
Token    Class
-----    -----
#include<iostream>    Preprocessor Directive
using    Keyword
namespace    Keyword
std    Keyword
;    Symbol
int    Keyword
number    Function
(    Bracket
)    Bracket
{    Bracket
return    Keyword
5    Constant
;    Symbol
}    Bracket
int    Keyword
main    Function
(    Bracket
)    Bracket
{    Bracket
int    Keyword
a    Identifier
=    Operator
10    Constant
;    Symbol
int    Keyword
b    Identifier
=    Operator
20    Constant
;    Symbol
cout    Keyword
<<    Operator
a    Identifier
+    Operator
b    Identifier
;    Symbol
return    Keyword
0    Constant
;    Symbol

Tags: String Tokenization, C++ Program for String Tokenizer, Language Translator Programs, string tokenizer, string tokenizer C++.

1 Comments

  1. This is just the information I am finding everywhere. Thanks for your blog, I just subscribe your blog. This is a nice blog.
    https://www.wattpad.com/958163297-what-is-a-finance-contract-hire-lease-gap

    ReplyDelete
Previous Post Next Post