- regfileparser_source_no_boost.zip
- RegFileParser_source_no_boost
- src
- RegFileParser.sln
- RegFileParserAutoTest
- main.cpp
- RegFileParserAutoTest.vcproj
- TestFiles
- 0_test_simple_a.reg
- 0_test_simple_w.reg
- 1_test_key_name_a.reg
- 1_test_key_name_w.reg
- 2_test_value_name_a.reg
- 2_test_value_name_w.reg
- 3_test_value_name_default_a.reg
- 3_test_value_name_default_w.reg
- 4_test_value_big_data_a.reg
- 4_test_value_big_data_w.reg
- 5_test_value_big_string_a.reg
- 5_test_value_big_string_w.reg
- TestRunner.hpp
- RegFileParserLib
- RegFileParserTestCmd
- regfileparser_bin.zip
- RegFileParser_bin
- bin
- RegFileParserAutoTest.exe
- RegFileParserTestCmd.exe
- regfileparser_source.zip
- RegFileParser_source
- src
- boost_libs
- libboost_program_options-vc90-mt-s-1_38.lib
- libboost_program_options-vc90-mt-sgd-1_38.lib
- RegFileParser.sln
- RegFileParserAutoTest
- main.cpp
- RegFileParserAutoTest.vcproj
- TestFiles
- 0_test_simple_a.reg
- 0_test_simple_w.reg
- 1_test_key_name_a.reg
- 1_test_key_name_w.reg
- 2_test_value_name_a.reg
- 2_test_value_name_w.reg
- 3_test_value_name_default_a.reg
- 3_test_value_name_default_w.reg
- 4_test_value_big_data_a.reg
- 4_test_value_big_data_w.reg
- 5_test_value_big_string_a.reg
- 5_test_value_big_string_w.reg
- TestRunner.hpp
- RegFileParserLib
- RegFileParserTestCmd
|
#ifndef _REG_FILE_PARSER_IMPL_H_
#define _REG_FILE_PARSER_IMPL_H_
/*
---------------------------------------------------
Simple schema of .reg file data structure:
[KEY_NAME]
VALUE_NAME=DATA
Comments:
- KEY_NAME may consist of alphabetical symbols and '"','\\','[',']'
- Number of values of one key can be from 0 to infinite
- VALUE_NAME can be:
- symbol '@' - it's mean default
- "text" - in this "text" can be this symbols '\n','"','\\','[',']'
- DATA can be:
- "text" - in this "text" can be any symbols but it always end by "\n"
- binary:
- hex(N):XX
- hex:XX
- dword:XX
Comments:
In XX can be pair of number symbols through a blank and
it can end by '\' symbol that mean that data continue on next line
Example:
dword:72,...,00,\
00,..,20
Also you can read boost::spirit info at the and of this file
*/
#include <boost/spirit/core.hpp>
#include <boost/spirit.hpp>
#include <boost/bind.hpp>
namespace reg_parser
{
template<class charT>
struct IResultProcessor
{
virtual ~IResultProcessor(){}
virtual void OnKeyFound(const charT* begin, const charT* end)=0;
virtual void OnValueNameFound(const charT* begin, const charT* end)=0;
virtual void OnValueDataFound(const charT* begin, const charT* end)=0;
};
template<class charT>
inline bool ParseRegFileImpl(const charT* buffer,
IResultProcessor<charT>* resultProc)
{
using namespace boost::spirit;
typedef rule<scanner<const charT*> > RuleType;
typedef chlit<charT> ch_t; // Single character
typedef chset<charT> chs_t; // Character set
typedef IResultProcessor<charT> ResProcT;
chs_t anychar_t(anychar_p); // Pattern to char set
chs_t eol_CR('\r'); // End of line CR
chs_t eol_LF('\n'); // End of line LF
chs_t eol_t(eol_CR);// CR or LF end of line
eol_t |= eol_LF;
/* ------------------------------------------------------------------------------------- */
/* Help rules*/
// Matches spaces or tabs
RuleType blanks = * blank_p;
// Symbols ']'and '[' - separate Key Name
RuleType not_name_separator = ~ch_t(']') & ~ch_t('[');
// empty data
RuleType empty_data = blanks >> (eol_t | ch_t('\0'));
// Data in what we don't interested
// @ - it's default value name, " - require additional processing
RuleType other_data = *(anychar_t & not_name_separator & ~ch_t('@')& ~ch_t('"'));
/* ------------------------------------------------------------------------------------- */
/* Rules that describe identifier of key name */
RuleType ident_kname_continue = ch_t(']') >> ~eol_t;
RuleType ident_key_name = *(anychar_t & ~ch_t(']')) || ident_kname_continue >> ident_key_name;
/* ------------------------------------------------------------------------------------- */
/* Rules that describe identifier of value name */
// Skip \" sequence
RuleType ident_vname_sz_skip = ch_t('\\') >> ch_t('"');
RuleType ident_vname_sz_impl = *(anychar_t & ~ch_t('"') || ident_vname_sz_skip );
// Add trailing symbols to match pattern
RuleType ident_vname_sz = ch_t('"') >> ident_vname_sz_impl >> +ch_t('"');
// Rule for default value name
RuleType ident_vname_def = ch_t('@');
// "text" or DEFAULT
RuleType ident_value_name = ident_vname_def | ident_vname_sz;
/* ------------------------------------------------------------------------------------- */
/* Rules that describe value data */
// if value data is binary
// Data can be any character except '\\' and '\n' in sequence
RuleType vdata_bin_body =*(anychar_t & ~ch_t('\0') & ~eol_t & ~ch_t('\\'));
RuleType vdata_bin_continue = ch_t('\\') >> eol_t;
// if '\\' and '\n' in sequence data continue on the next line
RuleType vdata_bin = vdata_bin_body >> *(vdata_bin_continue >> vdata_bin_body);
// if value data is string
// String data always end by '"' and '\n' symbols in sequence
RuleType vdata_sz_continue = +ch_t('"') >> ~eol_t;
RuleType vdata_sz_body_impl = *(anychar_t & ~ch_t('"'));
RuleType vdata_sz_body = vdata_sz_body_impl >> *(vdata_sz_continue >> vdata_sz_body_impl);
// String data always starts and end with '"' symbol
RuleType ident_vdata_sz = ch_t('"') >> vdata_sz_body >> +ch_t('"');
// Check value data format
RuleType ident_vdata = ident_vdata_sz | vdata_bin;
/* ------------------------------------------------------------------------------------- */
/* Put all rules together */
// line with key name
RuleType l_key =
other_data>> // Can be comments or start title
ch_t('[') >> // starts key name
// Call OnKeyFound function if rule succeed
ident_key_name [bind(&ResProcT::OnKeyFound, resultProc, _1,_2) ] >>
blanks >> // can be blanks
ch_t(']') >> // end key name
blanks >> // can be blanks
*eol_t; // one or more end of line symbols
// lines with value name and data
RuleType l_values =
other_data>> // Can be comments or start title
// Call OnValueNameFound function if rule succeed
ident_value_name [bind(&ResProcT::OnValueNameFound, resultProc, _1,_2) ] >>
blanks >> // can be blanks
ch_t('=') >> // always separate value name and value data
blanks >> // can be blanks
// Call OnValueDataFound function if rule succeed
ident_vdata [bind(&ResProcT::OnValueDataFound, resultProc, _1,_2) ] >>
blanks >> // can be blanks
*eol_t; // one or more end of line symbols
// Any line can satisfy one of three rules
RuleType lines = l_key | l_values | empty_data;
// Do lexeme_d pars that compare also additional symbols
// if do just *lines, symbols ' ','\t','\n' not be compared
RuleType reg_file = lexeme_d [*lines] ;
// Execute parse
return (parse(buffer, reg_file).full);
}
} //namespace reg_parser
#endif //_REG_FILE_PARSER_IMPL_H_
/*
--------------------------------------------------------------------------------
boost::spirit sort info
--------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
Full info available at http://www.boost.org/ or directly
at http://www.boost.org/doc/libs/1_36_0/libs/spirit/classic/index.html
////////////////////////////////////////////////////////////////////////////////
Set operators:
a | b Union Match a or b. Also referred to as alternative
a & b Intersection Match a and b
a - b Difference Match a but not b. If both match and b's matched text
is shorter than a's matched text, a successful match is made
a ^ b XOR Match a or b, but not both
Sequencing Operators:
a >> b Sequence Match a and b in sequence
a && b Sequential-and Sequential-and. Same as above, match a and b in sequence
a || b Sequential-or Match a or b in sequence
Optional and Loops:
*a - Match a zero (0) or more times
+a - Match a one (1) or more times
!a - Match a zero (0) or one (1) time
a % b - Match a list of one or more repetitions of a separated by occurrences of b.
This is the same as a >> *(b >> a). Note that a must not also match b
Single character parsers:
anychar_p Matches any single character (including the null terminator: '\0')
alnum_p Matches alpha-numeric characters
alpha_p Matches alphabetic characters
blank_p Matches spaces or tabs
cntrl_p Matches control characters
digit_p Matches numeric digits
graph_p Matches non-space printing characters
lower_p Matches lower case letters
print_p Matches printable characters
punct_p Matches punctuation symbols
space_p Matches spaces, tabs, returns, and newlines
upper_p Matches upper case letters
xdigit_p Matches hexadecimal digits
Other comments:
negation ~
Example: ~ch_t('x') - matches any character except 'x'
---------------------------------------------------
*/
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
ApriorIT is a software research and development company specializing in cybersecurity and data management technology engineering. We work for a broad range of clients from Fortune 500 technology leaders to small innovative startups building unique solutions.
As Apriorit offers integrated research&development services for the software projects in such areas as endpoint security, network security, data security, embedded Systems, and virtualization, we have strong kernel and driver development skills, huge system programming expertise, and are reals fans of research projects.
Our specialty is reverse engineering, we apply it for security testing and security-related projects.
A separate department of Apriorit works on large-scale business SaaS solutions, handling tasks from business analysis, data architecture design, and web development to performance optimization and DevOps.
Official site: https://www.apriorit.com
Clutch profile: https://clutch.co/profile/apriorit
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.