CourseInsights / src / datasetProcessor / DatasetParser.ts
DatasetParser.ts
Raw
import { InsightError } from "../controller/IInsightFacade";
import InsightFacade from "../controller/InsightFacade";
import DatasetCollector from "./DatasetCollector";
import { DatasetBuilding, DatasetRoom, DatasetSection, GeoResponse } from "./DatasetSection";
import DatasetValidator from "./DatasetValidator";
import { parse } from "parse5";

export default class DatasetParser {
	private insightFacade: InsightFacade;

	constructor(insightFacade: InsightFacade) {
		this.insightFacade = insightFacade;
	}

	public parseDataset(content: string[]): DatasetSection[] {
		const sections: DatasetSection[] = [];

		content.forEach((file: any) => {
			try {
				const course = JSON.parse(file); //JSON formatter may throw error
				if (!course?.result || !Array.isArray(course.result)) return; //ignore invalid course, no result array

				course.result.forEach((section: any) => {
					if (!new DatasetValidator(this.insightFacade).validateFields(section)) return; //ignore invalid section, missing fields

					const datasetSection = new DatasetCollector().collectDataset(section);

					sections.push(datasetSection);
				});
			} catch (_e) {
				return; // Ignore invalid course, not JSON
			}
		});

		return sections;
	}

	public async parseBuilding(content: string): Promise<DatasetBuilding[]> {
		const buildings: DatasetBuilding[] = [];

		try {
			const trTrees = this.parseHTML(content, "views-field-title");
			const collector = new DatasetCollector();

			await Promise.all(
				trTrees.map(async (trTree: any) => {
					try {
						const buildingAddress = collector.collectElementByClass(trTree, "views-field-field-building-address")[0];
						const address = collector.collectText(buildingAddress);

						const geo: GeoResponse = await collector.collectGeoResponse(address);
						if (geo.error) return;

						const datasetBuilding = collector.collectDatasetBuilding(address, geo, trTree);

						buildings.push(datasetBuilding);
					} catch (_err) {
						return;
					}
				})
			);
		} catch (err) {
			throw new InsightError(`parseBuilding ${err}`);
		}

		return buildings;
	}

	public parseRoom(content: string, building: DatasetBuilding): DatasetRoom[] {
		const rooms: DatasetRoom[] = [];

		try {
			this.parseHTML(content, "views-field-field-room-number").map(async (trTree: any) => {
				try {
					const datasetRoom = new DatasetCollector().collectDatasetRoom(trTree, building);

					rooms.push(datasetRoom);
				} catch (_err) {
					return;
				}
			});
		} catch (_err) {
			return [];
		}

		return rooms;
	}

	public parseHTML(content: string, field: string): any {
		const htmlTree = parse(content);

		const collector = new DatasetCollector();
		const tableTree = collector.collectElementByTag(htmlTree, "table").filter((table: any) => {
			return collector.collectElementByClass(table, field).length !== 0;
		})[0]; //get correct table
		const tbodyTree = collector.collectElementByTag(tableTree, "tbody")[0];
		return collector.collectElementByTag(tbodyTree, "tr"); //get rows of buildings
	}
}