String Tokenization |
Tokenizing a string in C++
String Tokenization is nothing but splitting a string when encounters a delimiter. In the below code, a file is read which contains a simple C++ Program to be tokenized. It reads each and every string from the file and categorizes a string or special character as keywords, brackets, operators, and symbols,identifiers etc. for example, if it reads int, float, cout, etc it will categorize as keywords or it is Symbol if it reads special characters such as,;,,,(,),[,] etc, and if it is an operator such as +,-,*,\, etc then categorize as Operator and for any other variables it will define as identifiers an so on. In our example program, you can see the implementation of string tokenization for a file program and the output as shown below.
String Tokenization Program in C++
#include<iostream>
#include<fstream>
#include<string>
#include<cctype>
using namespace std;
string keywords[] = {"int","float","return","double","cout","using","namespace","std"};
string brackets[] = {"}","{","(",")","[","]"};
string operators[] = {"=","+","-","*","/","<<"};
string symbols[] = {";"};
bool isContained(string query,string arr[],int s)
{
for(int i=0;i<s;i++)
{
if(query == arr[i])
return true;
}
return false;
}
bool isConstant(string query)
{
for(int i=0;i<query.size();i++)
{
if(!isdigit(query[i]))
return false;
}
return true;
}
bool isIdentifier(string query)
{
if(isdigit(query[0]))
return false;
for(int i=1;i<query.size();i++)
{
if(!isalnum(query[i]))
return false;
}
return true;
}
int main()
{
fstream file;
string line,word="";
bool flag=false;
file.open("Program.cpp",ios::in);
cout<<"Token\t\t\tClass"<<endl;
cout<<"-----\t\t\t-----"<<endl;
while(getline(file,line))
{
//cout<<line<<endl;
line.push_back('\0');
if(line[0] == '#')
cout<<line<<"\tPreprocessor Directive"<<endl;
else
{
if(line.find("()") != string::npos)
{
flag = true;
}
for(int i=0;i<line.size();i++)
{
if(line[i] == ' ' || line[i] == '\0' || (flag && line[i]=='('))
{
if(flag && line[i]=='(')
{
cout<<word<<"\t\t\tFunction"<<endl;
cout<<"("<<"\t\t\tBracket"<<endl;
flag = false;
}
else if(isContained(word,keywords,8))
cout<<word<<"\t\t\tKeyword"<<endl;
else if(isContained(word,brackets,6))
cout<<word<<"\t\t\tBracket"<<endl;
else if(isContained(word,operators,6))
cout<<word<<"\t\t\tOperator"<<endl;
else if(isContained(word,symbols,1))
cout<<word<<"\t\t\tSymbol"<<endl;
else if(isConstant(word))
cout<<word<<"\t\t\tConstant"<<endl;
else if(isIdentifier(word))
cout<<word<<"\t\t\tIdentifier"<<endl;
else
cout<<word<<"\t\t\tUndefined"<<endl;
word.clear(); }
else
{
word.push_back(line[i]);
}}
}
word.clear();
flag = false;
}
file.close();
return 0;
}
Program.cpp
#include<iostream>
using namespace std ;
int number()
{
return 5 ;
}
int main()
{
int a = 10 ;
int b = 20 ;
cout << a + b ;
return 0 ;
}
OUTPUT:
Token Class
----- -----
#include<iostream> Preprocessor Directive
using Keyword
namespace Keyword
std Keyword
; Symbol
int Keyword
number Function
( Bracket
) Bracket
{ Bracket
return Keyword
5 Constant
; Symbol
} Bracket
int Keyword
main Function
( Bracket
) Bracket
{ Bracket
int Keyword
a Identifier
= Operator
10 Constant
; Symbol
int Keyword
b Identifier
= Operator
20 Constant
; Symbol
cout Keyword
<< Operator
a Identifier
+ Operator
b Identifier
; Symbol
return Keyword
0 Constant
; Symbol
} Bracket
You must be also searching for these programming languages :
You must be also searching for these programming languages :
Tags: String Tokenization, C++ Program for String Tokenizer, Language Translator Programs, string tokenizer, string tokenizer C++.
This is just the information I am finding everywhere. Thanks for your blog, I just subscribe your blog. This is a nice blog.
ReplyDeletehttps://www.wattpad.com/958163297-what-is-a-finance-contract-hire-lease-gap