Files
Myrient-Search-Engine/server.js

177 lines
4.9 KiB
JavaScript
Raw Normal View History

2024-10-22 00:41:46 -06:00
import getAllFiles from "./lib/dircrawl.js";
import FileHandler from "./lib/filehandler.js";
import Searcher from "./lib/search.js";
import cron from "node-cron";
import FileOlderThan from "file-older-than";
import "dotenv/config";
import express from "express";
import http from "http";
import sanitize from "sanitize";
import debugPrint from "./lib/debugprint.js";
2024-10-22 00:41:46 -06:00
let fileListPath = "./filelist.json";
let queryCountFile = "./queries.txt";
let categoryListPath = "./lib/categories.json";
let categoryList = await FileHandler.parseJsonFile(categoryListPath);
let crawlTime = 0;
let queryCount = 0;
let fileCount = 0;
let indexPage = "pages/index";
if (FileHandler.fileExists(fileListPath)) {
crawlTime = await FileHandler.fileTime(fileListPath);
}
2024-10-22 00:41:46 -06:00
if (FileHandler.fileExists(queryCountFile)) {
queryCount = parseInt(await FileHandler.readFile(queryCountFile));
}
2024-10-17 02:02:07 -06:00
2024-10-22 00:41:46 -06:00
let searchFields = ["filename", "category", "type", "region"];
let defaultSettings = {
2024-10-22 00:41:46 -06:00
boost: {},
combineWith: "AND",
fields: searchFields,
fuzzy: 0,
prefix: true,
2024-10-22 00:41:46 -06:00
};
//programmatically set the default boosts while reducing overhead when adding another search field
2024-10-22 00:41:46 -06:00
for (let field in searchFields) {
let fieldName = searchFields[field];
if (searchFields[field] == "filename") {
defaultSettings.boost[fieldName] = 2;
} else {
defaultSettings.boost[fieldName] = 1;
}
}
2024-10-22 00:41:46 -06:00
let fileList = [];
let search; //cheat so we can check before assignment
2024-10-22 00:41:46 -06:00
async function getFilesJob() {
console.log("Updating the file list.");
fileList = await getAllFiles(categoryList);
await FileHandler.saveJsonFile(fileListPath, fileList);
2024-10-22 00:49:26 -06:00
fileCount = fileList.length;
if (typeof search == "undefined") {
2024-10-22 04:02:27 -06:00
search = new Searcher(searchFields);
await search.createIndex(fileList)
2024-10-22 03:21:37 -06:00
} else {
await search.updateIndex(fileList);
}
2024-10-22 03:21:37 -06:00
fileList = [];
2024-10-22 00:41:46 -06:00
crawlTime = await FileHandler.fileTime(fileListPath);
console.log(`Finished updating file list. ${fileCount} found.`);
2024-10-22 00:04:07 -06:00
}
function buildOptions(page, options) {
2024-10-22 00:41:46 -06:00
return { page: page, ...options, ...defaultOptions };
}
2024-10-22 00:41:46 -06:00
if (
process.env.FORCE_FILE_REBUILD == "1" ||
!FileHandler.fileExists(fileListPath) ||
FileOlderThan(fileListPath, "1w")
) {
await getFilesJob();
} else {
fileList = await FileHandler.parseJsonFile(fileListPath);
fileCount = fileList.length;
2024-10-22 04:02:27 -06:00
search = new Searcher(searchFields);
await search.createIndex(fileList)
2024-10-22 03:21:37 -06:00
fileList = [];
}
2024-10-22 00:04:07 -06:00
let defaultOptions = {
crawlTime: crawlTime,
2024-10-22 00:41:46 -06:00
queryCount: queryCount,
2024-10-22 03:21:37 -06:00
fileCount: fileCount,
2024-10-22 03:45:15 -06:00
termCount: search.miniSearch.termCount
2024-10-22 00:41:46 -06:00
};
2024-10-22 00:04:07 -06:00
let app = express();
let server = http.createServer(app);
2024-10-22 00:41:46 -06:00
app.use(sanitize.middleware);
app.set("view engine", "ejs");
app.get("/", function (req, res) {
let page = "search";
res.render(indexPage, buildOptions(page));
});
app.get("/search", async function (req, res) {
let query = req.query.q ? req.query.q : "";
2024-10-22 03:21:37 -06:00
let settings = {};
try {
settings = req.query.s ? JSON.parse(atob(req.query.s)) : defaultSettings;
2024-10-22 03:21:37 -06:00
} catch {
debugPrint("Search settings corrupt, forcing default.");
settings = defaultSettings;
}
2024-10-22 03:21:37 -06:00
for (let key in defaultSettings) {
let failed = false;
if (typeof settings[key] != "undefined") {
if (typeof settings[key] != typeof defaultSettings[key]) {
debugPrint("Search settings corrupt, forcing default.");
failed = true;
break;
}
}
2024-10-22 03:21:37 -06:00
if (failed) {
settings = defaultSettings;
}
}
2024-10-22 03:21:37 -06:00
if (settings.combineWith != "AND") {
2024-10-22 00:41:46 -06:00
delete settings.combineWith; //remove if unset to avoid crashing
}
2024-10-22 00:41:46 -06:00
let results = await search.findAllMatches(query, settings);
2024-10-22 03:21:37 -06:00
debugPrint(results);
2024-10-22 00:04:07 -06:00
let options = {
query: query,
2024-10-17 02:02:07 -06:00
results: results,
indexing: search.indexing,
2024-10-22 00:41:46 -06:00
};
let page = "results";
options = buildOptions(page, options);
res.render(indexPage, options);
queryCount += 1;
FileHandler.writeFile(queryCountFile, String(queryCount));
});
app.get("/lucky", async function (req, res) {
2024-10-22 03:21:37 -06:00
let results = [];
if (req.query.q) {
let settings = req.query.s ? JSON.parse(req.query.s) : defaultSettings;
results = await search.findAllMatches(req.query.q, settings);
2024-10-22 03:21:37 -06:00
debugPrint(results);
}
if (results.length) {
2024-10-22 00:41:46 -06:00
res.redirect(results.items[0].path);
} else {
const magicNum = Math.floor(Math.random() * search.getIndexSize());
2024-10-22 03:21:37 -06:00
const luckyPath = search.findIndex(magicNum).path;
debugPrint(`${magicNum}: ${luckyPath}`);
res.redirect(luckyPath);
2024-10-17 02:10:44 -06:00
}
2024-10-22 04:05:11 -06:00
queryCount += 1;
FileHandler.writeFile(queryCountFile, String(queryCount));
2024-10-22 00:41:46 -06:00
});
app.get("/settings", function (req, res) {
let options = { defaultSettings: defaultSettings };
let page = "settings";
options = buildOptions(page, options);
res.render(indexPage, options);
});
server.listen(process.env.PORT, process.env.BIND_ADDRESS);
server.on("listening", function () {
console.log(
"Server started on %s:%s.",
server.address().address,
server.address().port
);
});
console.log(`Loaded ${fileCount} known files.`);
cron.schedule("0 0 0 * * *", getFilesJob);