TextDiff - A Simple C++ Text Compare Application
An article on a simple C++ text compare application

Introduction
This article is about a simple C++ text comparing application. This application performs comparison between two ANSI text files and displays the differences.
Background
This article is written for the Code Lean and Mean challenge. It shows the developer how to implement the file-comparer application that seeks for differences inside two ANSI files and displays them. The algorithm implemented here finds the difference and then performs the sequential search from that point to the end of the both files, looking for the next equal point. The minimum difference is calculated based on the number of lines passed in the described search and based on that difference, the critical lines from one of the files are outputed (on the screen or in the log file).
Using the Code
Using the code is quite simple. Below is listed the whole application source:
FileComparerTest.cpp : Defines the entry point for the console application. #include "stdafx.h" #include "string.h" #include "conio.h" #define MAX_LINE_LENGTH 4096 #define OUTPUT_TYPE_SCREEN 0 #define OUTPUT_TYPE_FILE 1 void CompareFiles(char* lpszFirstFile, char* lpszSecondFile, int iOutputType); int _tmain(int argc, _TCHAR* argv[]) { // Compare files CompareFiles("\\\\pmicsnisfnp02\\Users$\\dnikoli1\\Grid\\grid1.html", "\\\\pmicsnisfnp02\\Users$\\dnikoli1\\Grid\\grid2.html", OUTPUT_TYPE_SCREEN); // Show default message printf("Press any key to continue..."); while (!_kbhit()); return 0; } void CompareFiles(char* lpszFirstFile, char* lpszSecondFile, int iOutputType) { int iCurrentLine1 = 0; int iCurrentLine2 = 0; int iTotalLines1 = 0; int iTotalLines2 = 0; int off1 = -1; int off2 = -1; char* lpszT1 = NULL; char* lpszT2 = NULL; char lpszText1[MAX_LINE_LENGTH]; char lpszText2[MAX_LINE_LENGTH]; char lpszText3[MAX_LINE_LENGTH]; char lpszText1_Temp[MAX_LINE_LENGTH]; char lpszText2_Temp[MAX_LINE_LENGTH]; char* lpszT1_Temp = NULL; char* lpszT2_Temp = NULL; // Check for valid file names if ((lpszFirstFile != NULL) && (lpszSecondFile != NULL)) { // Try to open files FILE* f1 = fopen(lpszFirstFile, "r"); FILE* f2 = fopen(lpszSecondFile, "r"); // Open log file FILE* f3 = NULL; if (iOutputType == OUTPUT_TYPE_SCREEN) { printf("Comparing files...\n"); printf("[File1]: %s\n", lpszFirstFile); printf("[File2]: %s\n\n", lpszSecondFile); } else { f3 = fopen("Log.txt", "w"); sprintf(lpszText3, "Comparing files...\n"); fputs(lpszText3, f3); sprintf(lpszText3, "[File1]: %s\n", lpszFirstFile); fputs(lpszText3, f3); sprintf(lpszText3, "[File2]: %s\n\n", lpszSecondFile); fputs(lpszText3, f3); } // Check for valid files if ((f1 != NULL) && (f2 != NULL)) { do { // Increment line counters iCurrentLine1++; iCurrentLine2++; // Read single line of text from files lpszT1 = fgets(lpszText1, MAX_LINE_LENGTH, f1); lpszT2 = fgets(lpszText2, MAX_LINE_LENGTH, f2); // Compare lines of text if (strcmp(lpszText1, lpszText2) != 0) { // Scan through second file looking for equal lines iTotalLines2 = 0; off2 = ftell(f2); FILE* f2t = fopen(lpszSecondFile, "r"); fseek(f2t, off2, SEEK_SET); do { // Skip different lines iTotalLines2++; lpszT2_Temp = fgets(lpszText2_Temp, MAX_LINE_LENGTH, f2t); } while ((lpszT2_Temp != NULL) && ((strcmp(lpszText1, lpszText2_Temp) != 0))); fclose(f2t); // Scan through first file looking for equal lines iTotalLines1 = 0; off1 = ftell(f1); FILE* f1t = fopen(lpszFirstFile, "r"); fseek(f1t, off1, SEEK_SET); do { // Skip different lines iTotalLines1++; lpszT1_Temp = fgets(lpszText1_Temp, MAX_LINE_LENGTH, f1t); } while ((lpszT1_Temp != NULL) && ((strcmp(lpszText2, lpszText1_Temp) != 0))); fclose(f1t); // Compare lines passed (find minimum) if ((lpszT1_Temp != NULL) || (lpszT2_Temp != NULL)) { if (iTotalLines2 < iTotalLines1) { // Scan through second file looking for equal lines if (iOutputType == OUTPUT_TYPE_SCREEN) { printf("\n******************************\n"); printf("* StartOfSection *\n"); printf("******************************\n"); printf("\t\t[File2, %d]: %s", iCurrentLine2, lpszText2); } else { sprintf(lpszText3, "\n******************************\n"); fputs(lpszText3, f3); sprintf(lpszText3, "* StartOfSection *\n"); fputs(lpszText3, f3); sprintf(lpszText3, "******************************\n"); fputs(lpszText3, f3); sprintf(lpszText3, "\t\t[File2, %d]: %s", iCurrentLine2, lpszText2); fputs(lpszText3, f3); } off2 = ftell(f2); FILE* f2t = fopen(lpszSecondFile, "r"); fseek(f2t, off2, SEEK_SET); for (int i=0; i<iTotalLines2-1; i++) { fgets(lpszText2_Temp, MAX_LINE_LENGTH, f2t); if (iOutputType == OUTPUT_TYPE_SCREEN) { printf("\t\t[File2, %d]: %s", iCurrentLine2+i+1, lpszText2_Temp); } else { sprintf(lpszText3, "\t\t[File2, %d]: %s", iCurrentLine2+i+1, lpszText2_Temp); fputs(lpszText3, f3); } } fgets(lpszText2_Temp, MAX_LINE_LENGTH, f2t); off2 = ftell(f2t); fseek(f2, off2, SEEK_SET); fclose(f2t); iCurrentLine2 += iTotalLines2; if (iOutputType == OUTPUT_TYPE_SCREEN) { printf("******************************\n"); printf("* EndOfSection *\n"); printf("******************************\n\n"); } else { sprintf(lpszText3, "******************************\n"); fputs(lpszText3, f3); sprintf(lpszText3, "* EndOfSection *\n"); fputs(lpszText3, f3); sprintf(lpszText3, "******************************\n\n"); fputs(lpszText3, f3); } } else { // Scan through first file looking for equal lines if (iOutputType == OUTPUT_TYPE_SCREEN) { printf("\n******************************\n"); printf("* StartOfSection *\n"); printf("******************************\n"); printf("\t\t[File1, %d]: %s", iCurrentLine1, lpszText1); } else { sprintf(lpszText3, "\n******************************\n"); fputs(lpszText3, f3); sprintf(lpszText3, "* StartOfSection *\n"); fputs(lpszText3, f3); sprintf(lpszText3, "******************************\n"); fputs(lpszText3, f3); sprintf(lpszText3, "\t\t[File1, %d]: %s", iCurrentLine1, lpszText1); fputs(lpszText3, f3); } off1 = ftell(f1); FILE* f1t = fopen(lpszFirstFile, "r"); fseek(f1t, off1, SEEK_SET); for (int i=0; i<iTotalLines1-1; i++) { fgets(lpszText1_Temp, MAX_LINE_LENGTH, f1t); if (iOutputType == OUTPUT_TYPE_SCREEN) { printf("\t\t[File1, %d]: %s", iCurrentLine1+i+1, lpszText1_Temp); } else { sprintf(lpszText3, "\t\t[File1, %d]: %s", iCurrentLine1+i+1, lpszText1_Temp); fputs(lpszText3, f3); } } fgets(lpszText1_Temp, MAX_LINE_LENGTH, f1t); off1 = ftell(f1t); fseek(f1, off1, SEEK_SET); fclose(f1t); iCurrentLine1 += iTotalLines1; if (iOutputType == OUTPUT_TYPE_SCREEN) { printf("******************************\n"); printf("* EndOfSection *\n"); printf("******************************\n\n"); } else { sprintf(lpszText3, "******************************\n"); fputs(lpszText3, f3); sprintf(lpszText3, "* EndOfSection *\n"); fputs(lpszText3, f3); sprintf(lpszText3, "******************************\n\n"); fputs(lpszText3, f3); } } } else { if (iOutputType == OUTPUT_TYPE_SCREEN) { printf("\n******************************\n"); printf("* StartOfSection *\n"); printf("******************************\n"); printf("\t\t[File1, %d]: %s", iCurrentLine1, lpszText1); printf("\t\t[File2, %d]: %s", iCurrentLine2, lpszText2); printf("******************************\n"); printf("* EndOfSection *\n"); printf("******************************\n\n"); } else { sprintf(lpszText3, "\n******************************\n"); fputs(lpszText3, f3); sprintf(lpszText3, "* StartOfSection *\n"); fputs(lpszText3, f3); sprintf(lpszText3, "******************************\n"); fputs(lpszText3, f3); sprintf(lpszText3, "\t\t[File1, %d]: %s", iCurrentLine1, lpszText1); fputs(lpszText3, f3); sprintf(lpszText3, "\t\t[File2, %d]: %s", iCurrentLine2, lpszText2); fputs(lpszText3, f3); sprintf(lpszText3, "******************************\n"); fputs(lpszText3, f3); sprintf(lpszText3, "* EndOfSection *\n"); fputs(lpszText3, f3); sprintf(lpszText3, "******************************\n\n"); fputs(lpszText3, f3); } } } } while ((lpszT1 != NULL) && (lpszT2 != NULL)); // Show success if (iOutputType == OUTPUT_TYPE_SCREEN) { printf("\nSuccess: The files compared !\n\n"); } else { fputs("\nSuccess: The files compared !\n", f3); } } else { // The first file can not be found if (f1 == NULL) { // Show error if (iOutputType == OUTPUT_TYPE_SCREEN) { printf("Error: The first file can not be found !\n"); } else { fputs("Error: The first file can not be found !\n", f3); } } // The second file can not be found if (f2 == NULL) { // Show error if (iOutputType == OUTPUT_TYPE_SCREEN) { printf("Error: The second file can not be found !\n"); } else { fputs("Error: The second file can not be found !\n", f3); } } } // Close files fclose(f1); fclose(f2); if (iOutputType == OUTPUT_TYPE_FILE) { // Close log file fclose(f3); } } }
Points of Interest
I was wondering how MS-DOS command FC works. This could be the answer since the output is very similar. The total execution time is measured by miliseconds and the memory used is just a few kilobytes.