import System.IO import System.Exit import Data.List validTokens = ["html", "head", "body", "title", "h1", "h2", "h3", "p", "ul", "li", "a", "div", "br", "hr"] validTokens' = ["/html", "/head", "/body", "/title", "/h1", "/h2", "/h3", "/p", "/ul", "/li", "/a", "/div", "/br", "/hr"] nestables = ["h1", "h2", "h3", "p", "ul", "li", "a", "div"] nestables' = ["/h1", "/h2", "/h3", "/p", "/ul", "/li", "/a", "/div"] ignorables = ["br","hr"] main :: IO() main = do filecontent <- readFile "file.html" let fileLines = filecontent let tokenized = map cleanAttribute $ extractTokens fileLines checkTokens tokenized checkHeader tokenized let bodyTokens = isolateBody tokenized if bodyTokens == [] then errorWithoutStackTrace "Expected <body>...</body>" else checkBodyStructure (head bodyTokens) (last bodyTokens) let stack = push [] (tail $ init bodyTokens) -- push tokens onto the stack one by one if stack == [] then putStrLn "No errors detected" else putStrLn "There remmain unclosed tags" extractTokens [] = [] extractTokens ('<':xs) = let (curr,rest) = extractTokens' xs in curr:extractTokens rest extractTokens (_ :xs) = extractTokens xs extractTokens' [] = ([], []) extractTokens' ('>':xs) = ([], xs) extractTokens' (x :xs) = let (curr,rest) = extractTokens' xs in (x:curr, rest) cleanAttribute:: String -> String cleanAttribute s = fst $ break (' '==) s -- take everything from inside <tag ...> up untill the first space checkTokens:: [String] -> IO() checkTokens [] = errorWithoutStackTrace "No tags found" -- catches empty file checkTokens (x:xs) = checkTokens' (x:xs) checkTokens':: [String] -> IO() checkTokens' [] = putStrLn "Scanned all tags" checkTokens' (x:xs) = if (x `elem` validTokens || x `elem` validTokens') then checkTokens' xs else errorWithoutStackTrace $ "Invalid tag: " ++ x checkHeader :: [String] -> IO() checkHeader xs | head xs /= "html" || last xs /= "/html" = die "Expected <html> .... <html>" | not ((take 2 (tail xs)) == ["head", "/head"] || (take 4 (tail xs)) == ["head", "title", "/title", "/head"]) = die "Expected <head> </head> or <head> <title>...</title> </head>" | otherwise = putStrLn "Checked Header Section" isolateBody :: [String] -> [String] isolateBody xs = takeWhile ("/html"/=) $ dropWhile ("body"/=) xs -- gives us <body>, ...., </body> checkBodyStructure:: String -> String -> IO () checkBodyStructure x y = if (x == "body" && y == "/body") then putStrLn ("<body>...</body> as expected") else die "Expected <body>...</body>" push:: [String] -> [String] -> [String] -- <tag> will be pushed onto stack and an appropriate </tag> will collapse the stack push [] [] = [] push stack [] = stack push stack (x:xs) | x `elem` ignorables = push stack xs --i.e. ignore <hr> and <br> since they can freely appear | x `elem` nestables = if not ((x=="div"||x=="p") && ("p" `elem` stack)) then push (x:stack) xs else errorWithoutStackTrace $ "Invalid nesting for <p> with: " ++x | x `elem` nestables' = if (removeSlash x) == head stack then push (tail stack) xs else errorWithoutStackTrace $ "Invalid nesting with: " ++x --i.e wrong closer found, error | otherwise = errorWithoutStackTrace $ "Invalid Tag: " ++ x removeSlash:: String -> String --well actually removes first character, but we know it must be a nestable i.e. </tag> removeSlash (x:xs) = xs