#include #include #include #include // Initial lengths for dynamic arrays #define bufferLengh 255 #define initTokenArrayLength 128 #define tokenLength 255 // Potentially rework, have dynamic token lengths // Lists of tokens const char* const nestables[] = {"

", "

", "

", "

", "

", "", "", "

", "", "", "
", ""}; //

nestables exclude

and

const char valid[] = "VALID"; // Globals, having to pass these as parameters cluttered code... int tokenCount =0; int head =0; int currentMaxTokenCount =initTokenArrayLength; char** tokens; char** stack; void failedMemAllocation() { fprintf(stderr, "%s", "Error allocating heap memory!\n"); exit(EXIT_FAILURE); } void lexingError(char* stringPtr) { printf("Error Detected: %s", stringPtr); exit(EXIT_FAILURE); } void presentError(char errorMessage[]) { printf("Error detected: %s\n", errorMessage); exit(EXIT_FAILURE); } void parsingSuccessfull() { printf("No errors detected\n"); exit(EXIT_SUCCESS); } void prepTokenArray() { tokens = malloc(initTokenArrayLength * sizeof(char *)); if(tokens) { for(int i = 0; i < initTokenArrayLength; i++) tokens[i] = malloc(tokenLength* sizeof(char)); } else { failedMemAllocation();} } void expandTokenArray() { char **tokensCopy = realloc(tokens, currentMaxTokenCount*2 * sizeof(char *)); if (tokensCopy) { tokens = tokensCopy; for(int i = 0; i= currentMaxTokenCount) { expandTokenArray(); } strcpy(tokens[tokenCount], tokenPtr); tokenCount++; } } void tokenize(char* stringPtr) { char *flag; strcpy(flag, " "); /*We build this up as we iterate the string. Strtok was not suitable, build up tokens char by char */ char tempToken[tokenLength]; strcpy(tempToken, ""); // Init current token // Catching stuff between <...> and >...< seperately. for(int i =0; i') { if (strcmp(flag,"<")==0) { strcat(tempToken, ">"); strcpy(flag," "); putToken(tempToken); strcpy(tempToken,""); } else {lexingError(stringPtr);} // Cant have a > unless we saw < already } else // Manage non angle brackets { if (strcmp(flag, "<")==0 && isspace(stringPtr[i])) // Cleans out attributes { do { if (i > strlen(stringPtr)) // Make sure a tag closes with > { lexingError(stringPtr); } i++; } while (stringPtr[i]!='>'); //Skips through untill the tag closes // Add the tag excluding the attribute strcat(tempToken, ">"); strcpy(flag," "); putToken(tempToken); strcpy(tempToken,""); } else if (strcmp(flag, " ")!=0) { strncat(tempToken, &stringPtr[i],1 ); } } } putToken(tempToken); // Catches non tag lines i.e plain text } void loadFile(char* filename) { FILE* filePointer; char buffer[bufferLengh]; filePointer = fopen(filename, "r"); if (!filePointer) // Check file not found { printf("\"%s\" does not exist...\n", filename); exit(EXIT_FAILURE); } while(fgets(buffer, bufferLengh, filePointer)) { tokenize(buffer); } fclose(filePointer); } int checkmatch(int current, char compareTo[]) { if (strcmp(tokens[current], compareTo)==0) {return 1;} return 0; } int validToken(char token[]) { for(int i =0;i has been closed already { for (int i=0;i", stack[i])==0) return 1; return 0; } void push(char token[]) { if (token[1] != '/') // Closing tags, second character always '/' { stack[head] = token; head++; } else //Collapses the stack whenever a valid closing tag is matched. { head--; // Generate closer for the top of stack, i.e.
->
char closer[tokenLength]; strcpy(closer, " pushes onto (bad nesting) } } void checkBody(int current) { for (current = current+1;current and { if (!(checkmatch(current, "
")|| checkmatch(current, "
"))) //
and
don't have a closer { if (!validToken(tokens[current])) {presentError("Invalid token found in body");} else if ( (checkmatch(current, "
")|| checkmatch(current, "

")) && stackContainsP()) {presentError("Tried to nest invalid tags within

");} //

cannot contain

or

push(tokens[current]); } } if(head==0) {parsingSuccessfull();} // I.e. Stack is empty, each had properly nested else {presentError("Unclosed tags found");} } void checkTokens() { prepareStack(); // Checking the opening of file is bit tedious int current = 0; if (!(checkmatch(current, "") && checkmatch(tokenCount-1, ""))) { presentError("Expected ..."); } current++; if (checkmatch(current, "") && checkmatch(current+1, "")) { current+=2; } else if (checkmatch(current, "") && checkmatch(current+1, "") && checkmatch(current+2, "") && checkmatch(current+3, "")) { current+=4; } else {presentError("Expected or ... ");} if (checkmatch(current, "") && checkmatch(current+1, "") && tokenCount ==6) { parsingSuccessfull(); /* Base cases: <\head> <\head> */ } else if (checkmatch(current, "") && checkmatch(tokenCount-2, "")) { /* Only occurs if above conditions met and content exists between ... */ checkBody(current); } else {presentError("Expected ... after ... section");} } int main(int argc, char *argv[]) { prepTokenArray(); if (argc==1) {loadFile("file.html");} // Open file.html by default else if (argc == 2) {loadFile(argv[1]);} else { printf("Too many command line args..."); exit(EXIT_FAILURE); } checkTokens(); }