Click here to Skip to main content
15,895,799 members
Articles / Programming Languages / C++

PdfView - Peeking into the Internals of PDFs

Rate me:
Please Sign up or sign in to vote.
4.55/5 (27 votes)
6 Oct 20055 min read 200.7K   6.8K   137  
A utility for viewing the internal structure of PDF documents.
// BRawPdf.cpp : Defines the class behaviors for the application.
//

#include "stdafx.h"
#include "BRawPdf.h"

CBRawPdf::CBRawPdf()
{
	m_abFile = NULL;
}

void CBRawPdf::Close()
{
	if (m_abFile != NULL)
		delete m_abFile;
	m_abFile = NULL;
}

void CBRawPdf::Open(CString strFilePath)
{
	Close();

	CFile file;
	if (!file.Open(strFilePath, CFile::modeRead))
	{
	    AfxMessageBox("Cannot open file.");
		AfxThrowUserException();
	}

	ULONGLONG ullLength = (ULONGLONG)file.GetLength();

	m_dwSize = (DWORD)ullLength;
	m_abFile = new BYTE[m_dwSize];
	file.Read(m_abFile, m_dwSize);
	file.Close();

	m_dwPos = 0;
}

DWORD CBRawPdf::GetPos()
{
	return m_dwPos;
}

void CBRawPdf::GotoEnd()
{
	m_dwPos = m_dwSize - 1;
}

void CBRawPdf::GotoPos(DWORD dwPos)
{
	m_dwPos = dwPos;
}

void CBRawPdf::GotoLine(CString strLine)
{
	for (;;)
	{
		if (m_abFile[m_dwPos] == strLine[0])
		{
			BOOL bOk = TRUE;
			for (DWORD i = 1; i < (DWORD)strLine.GetLength(); i++)
			{
				if (m_abFile[(int)(m_dwPos + i)] != strLine[(int)i])
					bOk = FALSE;
			}
			if (bOk == TRUE)
				break;
		}
		m_dwPos++;
	}
}

BOOL CBRawPdf::CharInSet(char c, CString strSet)
{
	for (DWORD i = 0; i < (DWORD)strSet.GetLength(); i++)
	{
		if (c == strSet[(int)i])
			return TRUE;
	}
	return FALSE;
}

BOOL CBRawPdf::_GetNextToken(CString& strToken, BOOL bCheckLink)
{
	strToken = "";

	// Skip White
	for (;;)
	{
		if (m_dwPos >= m_dwSize)
			return FALSE;

		if (CharInSet(m_abFile[m_dwPos], "\r\n ") == false)
			break;

		m_dwPos++;
	}

	if (bCheckLink)
	{
		DWORD dwPos = m_dwPos;

		CString strObject;
		CString strGeneration;
		CString strType;
		_GetNextToken(strObject, FALSE);
		if ((strObject.GetLength() > 0) && (strObject[0] >= '0') && (strObject[0] <= '9'))
		{
			_GetNextToken(strGeneration, FALSE);
			_GetNextToken(strType, FALSE);
			if (strType == "R")
			{
				strToken.Format("%s %s R", strObject.GetBuffer(0), strGeneration.GetBuffer(0));
				return TRUE;
			}
		}
		m_dwPos = dwPos;
	}

	for(;;)
	{
		if (CharInSet(m_abFile[m_dwPos], "\r\n "))
			return TRUE;

		if (!memcmp((BYTE*)&m_abFile[m_dwPos], "<<", 2))
		{
			if (strToken != "")
				return TRUE;
			strToken = "<<";
			m_dwPos += 2;
			return TRUE;
		}

		if (!memcmp((BYTE*)&m_abFile[m_dwPos], ">>", 2))
		{
			if (strToken != "")
				return TRUE;
			strToken = ">>";
			m_dwPos += 2;
			return TRUE;
		}

		if (CharInSet(m_abFile[m_dwPos], "/[])>"))
		{
			if (strToken != "")
				return TRUE;
		}
		if (m_abFile[m_dwPos] == '(')
		{
			if (strToken != "")
				return TRUE;

			strToken = "";
			DWORD dwDepth = 0;
			BOOL bBinary = FALSE;
			BOOL bUnicode = FALSE;

			if (m_dwPos + 2 <= m_dwSize)
			{
				if ((m_abFile[m_dwPos + 1] == 0xFE) && (m_abFile[m_dwPos + 2] == 0xFF))
				{
					strToken += "(";
					bUnicode = TRUE;
					m_dwPos += 3;
				}
			}

			for (int i = 0;; i++)
			{
				if (m_dwPos >= m_dwSize)
				{
					AfxMessageBox("Unterminated string.");
					AfxThrowUserException();
				}

				if (bUnicode)
				{
					if (m_abFile[m_dwPos] == ')')
					{
						strToken += CString((char)m_abFile[m_dwPos]);
						m_dwPos++;
						break;
					}
					m_dwPos++;
				}

				if (m_abFile[m_dwPos] > 127)
					bBinary = TRUE;

				strToken += CString((char)m_abFile[m_dwPos]);
				if (m_abFile[m_dwPos] == '(')
					dwDepth++;
				if (m_abFile[m_dwPos] == ')')
				{
					dwDepth--;
					if ((dwDepth <= 0) || bBinary)
					{
						m_dwPos++;
						break;
					}
				}
				m_dwPos++;
			}
			return TRUE;
		}

		if (m_abFile[m_dwPos] == '<')
		{
			if (strToken != "")
				return TRUE;

			strToken = "";
			DWORD dwDepth = 0;
			for (;;)
			{
				if (m_dwPos >= m_dwSize)
				{
					AfxMessageBox("Unterminated string.");
					AfxThrowUserException();
				}

				strToken += CString((char)m_abFile[m_dwPos]);
				if (m_abFile[m_dwPos] == '<')
					dwDepth++;
				if (m_abFile[m_dwPos] == '>')
				{
					dwDepth--;
					if (dwDepth <= 0)
					{
						m_dwPos++;
						break;
					}
				}
				m_dwPos++;
			}
			return TRUE;
		}

		strToken += CString((char)m_abFile[m_dwPos]);
		if (CharInSet(m_abFile[m_dwPos], "[]()<>"))
		{
			m_dwPos++;
			return TRUE;
		}
		m_dwPos++;
	}
}

void CBRawPdf::GetNextToken(CString& strToken)
{
	_GetNextToken(strToken, TRUE);
}

void CBRawPdf::AssertNextToken(CString strExpectedToken)
{
	CString strToken;
	GetNextToken(strToken);
	if (strToken == strExpectedToken)
		return;

    AfxMessageBox("Unexpected token.");
	AfxThrowUserException();
}

BOOL CBRawPdf::GetNextLine(CString& strLine)
{
	strLine = "";
	for (;;)
	{
		if (m_dwPos >= m_dwSize)
			return FALSE;

		if ((m_abFile[m_dwPos] == '\r') && (m_abFile[m_dwPos + 1] == '\n'))
		{
			m_dwPos += 2;
			break;
		}
		else if ((m_abFile[m_dwPos] == '\r') || (m_abFile[m_dwPos] == '\n'))
		{
			m_dwPos++;
			break;
		}
		strLine += m_abFile[m_dwPos];
		m_dwPos++;
	}
	return TRUE;
}

void CBRawPdf::AssertNextLine(CString strExpectedLine)
{
	CString strLine;
	GetNextLine(strLine);
	if (strLine == strExpectedLine)
		return;

    AfxMessageBox("Unexpected line.");
	AfxThrowUserException();
}

void CBRawPdf::GetPreviousLine(CString& strLine)
{
	strLine = "";
	if ((m_abFile[m_dwPos - 1] == '\r') && ((m_abFile[m_dwPos] == '\n')))
		m_dwPos -= 2;
	else if ((m_abFile[m_dwPos] == '\r') || (m_abFile[m_dwPos] == '\n'))
		m_dwPos--;

	for (;;)
	{
		if (m_dwPos < 0)
			return;

		if ((m_abFile[m_dwPos - 1] == '\r') && ((m_abFile[m_dwPos] == '\n')))
			break;
		else if ((m_abFile[m_dwPos] == '\r') || (m_abFile[m_dwPos] == '\n'))
			break;

		strLine = CString((char)m_abFile[m_dwPos]) + strLine;
		m_dwPos--;
	}
	return;
}

void CBRawPdf::AssertPreviousLine(CString strExpectedLine)
{
	CString strLine;
	GetPreviousLine(strLine);
	if (strLine == strExpectedLine)
		return;

    AfxMessageBox("Unexpected line.");
	AfxThrowUserException();
}

BOOL CBRawPdf::IsOpen()
{
	if (m_abFile == NULL)
		return FALSE;

	return TRUE;
}

BOOL CBRawPdf::GetMem(BYTE* pabBuffer, DWORD dwPos, DWORD dwSize)
{
	::memcpy(pabBuffer, &m_abFile[dwPos], dwSize);
	return TRUE;
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Turkey Turkey
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions