2024-10-22 00:41:46 -06:00
|
|
|
import getAllFiles from "./lib/dircrawl.js";
|
|
|
|
|
import FileHandler from "./lib/filehandler.js";
|
|
|
|
|
import Searcher from "./lib/search.js";
|
|
|
|
|
import cron from "node-cron";
|
|
|
|
|
import FileOlderThan from "file-older-than";
|
|
|
|
|
import "dotenv/config";
|
|
|
|
|
import express from "express";
|
|
|
|
|
import http from "http";
|
|
|
|
|
import sanitize from "sanitize";
|
2024-10-22 02:15:09 -06:00
|
|
|
import debugPrint from "./lib/debugprint.js";
|
2024-10-27 01:53:57 -03:00
|
|
|
import compression from "compression";
|
|
|
|
|
import { generateAsciiArt } from './lib/asciiart.js';
|
2024-10-22 00:41:46 -06:00
|
|
|
|
2024-10-27 22:26:21 -03:00
|
|
|
let fileListPath = "./data/filelist.json";
|
|
|
|
|
let queryCountFile = "./data/queries.txt";
|
2024-10-25 02:29:47 -06:00
|
|
|
let categoryListPath = "./lib/categories.json"
|
|
|
|
|
let searchAlikesPath = './lib/searchalikes.json'
|
2024-10-22 00:41:46 -06:00
|
|
|
let categoryList = await FileHandler.parseJsonFile(categoryListPath);
|
2024-10-25 03:56:56 -06:00
|
|
|
global.searchAlikes = await FileHandler.parseJsonFile(searchAlikesPath)
|
2024-10-22 00:41:46 -06:00
|
|
|
let crawlTime = 0;
|
|
|
|
|
let queryCount = 0;
|
|
|
|
|
let fileCount = 0;
|
|
|
|
|
let indexPage = "pages/index";
|
|
|
|
|
if (FileHandler.fileExists(fileListPath)) {
|
|
|
|
|
crawlTime = await FileHandler.fileTime(fileListPath);
|
2024-10-21 23:39:54 -06:00
|
|
|
}
|
2024-10-22 00:41:46 -06:00
|
|
|
if (FileHandler.fileExists(queryCountFile)) {
|
|
|
|
|
queryCount = parseInt(await FileHandler.readFile(queryCountFile));
|
2024-10-21 23:39:54 -06:00
|
|
|
}
|
2024-10-17 02:02:07 -06:00
|
|
|
|
2024-10-22 00:41:46 -06:00
|
|
|
let searchFields = ["filename", "category", "type", "region"];
|
2024-10-19 00:08:34 -06:00
|
|
|
|
|
|
|
|
let defaultSettings = {
|
2024-10-22 00:41:46 -06:00
|
|
|
boost: {},
|
|
|
|
|
combineWith: "AND",
|
|
|
|
|
fields: searchFields,
|
2024-10-22 03:17:20 -06:00
|
|
|
fuzzy: 0,
|
2024-10-19 00:08:34 -06:00
|
|
|
prefix: true,
|
2024-10-22 00:41:46 -06:00
|
|
|
};
|
2024-10-19 00:08:34 -06:00
|
|
|
|
|
|
|
|
//programmatically set the default boosts while reducing overhead when adding another search field
|
2024-10-22 00:41:46 -06:00
|
|
|
for (let field in searchFields) {
|
|
|
|
|
let fieldName = searchFields[field];
|
|
|
|
|
if (searchFields[field] == "filename") {
|
|
|
|
|
defaultSettings.boost[fieldName] = 2;
|
|
|
|
|
} else {
|
|
|
|
|
defaultSettings.boost[fieldName] = 1;
|
2024-10-19 00:08:34 -06:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-22 00:41:46 -06:00
|
|
|
let fileList = [];
|
|
|
|
|
let search; //cheat so we can check before assignment
|
2024-10-16 03:09:31 -06:00
|
|
|
|
2024-10-22 00:41:46 -06:00
|
|
|
async function getFilesJob() {
|
|
|
|
|
console.log("Updating the file list.");
|
|
|
|
|
fileList = await getAllFiles(categoryList);
|
2024-10-23 01:11:46 -06:00
|
|
|
if(!fileList){
|
|
|
|
|
if(typeof search == "undefined"){
|
|
|
|
|
//fall back to loading the list if it exists
|
|
|
|
|
await loadFileList()
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
2024-10-22 00:41:46 -06:00
|
|
|
await FileHandler.saveJsonFile(fileListPath, fileList);
|
2024-10-22 00:49:26 -06:00
|
|
|
fileCount = fileList.length;
|
2024-10-22 03:17:20 -06:00
|
|
|
if (typeof search == "undefined") {
|
2024-10-22 04:02:27 -06:00
|
|
|
search = new Searcher(searchFields);
|
|
|
|
|
await search.createIndex(fileList)
|
2024-10-22 03:21:37 -06:00
|
|
|
} else {
|
|
|
|
|
await search.updateIndex(fileList);
|
2024-10-17 01:23:34 -06:00
|
|
|
}
|
2024-10-22 03:21:37 -06:00
|
|
|
fileList = [];
|
2024-10-22 00:41:46 -06:00
|
|
|
crawlTime = await FileHandler.fileTime(fileListPath);
|
|
|
|
|
console.log(`Finished updating file list. ${fileCount} found.`);
|
2024-10-22 00:04:07 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function buildOptions(page, options) {
|
2024-10-22 00:41:46 -06:00
|
|
|
return { page: page, ...options, ...defaultOptions };
|
2024-10-16 03:09:31 -06:00
|
|
|
}
|
|
|
|
|
|
2024-10-23 01:11:46 -06:00
|
|
|
async function loadFileList(){
|
|
|
|
|
fileList = await FileHandler.parseJsonFile(fileListPath);
|
|
|
|
|
fileCount = fileList.length;
|
2024-10-25 02:29:47 -06:00
|
|
|
search = new Searcher(searchFields, searchAlikes.StringGroups);
|
2024-10-23 01:11:46 -06:00
|
|
|
await search.createIndex(fileList)
|
|
|
|
|
fileList = [];
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-22 00:41:46 -06:00
|
|
|
if (
|
|
|
|
|
process.env.FORCE_FILE_REBUILD == "1" ||
|
|
|
|
|
!FileHandler.fileExists(fileListPath) ||
|
2024-10-25 02:31:12 -06:00
|
|
|
FileOlderThan(fileListPath, "2w")
|
2024-10-22 00:41:46 -06:00
|
|
|
) {
|
|
|
|
|
await getFilesJob();
|
|
|
|
|
} else {
|
2024-10-23 01:11:46 -06:00
|
|
|
await loadFileList()
|
2024-10-16 03:09:31 -06:00
|
|
|
}
|
2024-10-16 00:52:22 -06:00
|
|
|
|
2024-10-22 00:04:07 -06:00
|
|
|
let defaultOptions = {
|
|
|
|
|
crawlTime: crawlTime,
|
2024-10-22 00:41:46 -06:00
|
|
|
queryCount: queryCount,
|
2024-10-22 03:21:37 -06:00
|
|
|
fileCount: fileCount,
|
2024-10-27 01:53:57 -03:00
|
|
|
termCount: search.miniSearch.termCount,
|
|
|
|
|
generateAsciiArt: generateAsciiArt
|
2024-10-22 00:41:46 -06:00
|
|
|
};
|
2024-10-22 00:04:07 -06:00
|
|
|
|
2024-10-22 04:12:21 -06:00
|
|
|
function updateDefaults(){
|
|
|
|
|
defaultOptions.crawlTime = crawlTime
|
|
|
|
|
defaultOptions.queryCount = queryCount
|
|
|
|
|
defaultOptions.fileCount = fileCount
|
|
|
|
|
defaultOptions.termCount = search.miniSearch.termCount
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-19 00:08:34 -06:00
|
|
|
let app = express();
|
|
|
|
|
let server = http.createServer(app);
|
2024-10-22 00:41:46 -06:00
|
|
|
app.use(sanitize.middleware);
|
2024-10-23 00:21:00 -06:00
|
|
|
app.use(compression())
|
2024-10-24 06:01:08 -06:00
|
|
|
app.use(express.json())
|
2024-10-27 01:53:57 -03:00
|
|
|
app.set("view engine", "ejs");
|
2024-10-22 00:41:46 -06:00
|
|
|
|
|
|
|
|
app.get("/", function (req, res) {
|
|
|
|
|
let page = "search";
|
|
|
|
|
res.render(indexPage, buildOptions(page));
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
app.get("/search", async function (req, res) {
|
|
|
|
|
let query = req.query.q ? req.query.q : "";
|
2024-10-24 01:43:11 -06:00
|
|
|
let pageNum = parseInt(req.query.p)
|
|
|
|
|
let urlPrefix = encodeURI(`/search?s=${req.query.s}&q=${req.query.q}&p=`)
|
|
|
|
|
pageNum = pageNum ? pageNum : 1
|
2024-10-22 03:21:37 -06:00
|
|
|
let settings = {};
|
|
|
|
|
try {
|
2024-10-22 02:15:09 -06:00
|
|
|
settings = req.query.s ? JSON.parse(atob(req.query.s)) : defaultSettings;
|
2024-10-22 03:21:37 -06:00
|
|
|
} catch {
|
|
|
|
|
debugPrint("Search settings corrupt, forcing default.");
|
|
|
|
|
settings = defaultSettings;
|
2024-10-22 02:15:09 -06:00
|
|
|
}
|
2024-10-22 03:21:37 -06:00
|
|
|
for (let key in defaultSettings) {
|
|
|
|
|
let failed = false;
|
|
|
|
|
if (typeof settings[key] != "undefined") {
|
|
|
|
|
if (typeof settings[key] != typeof defaultSettings[key]) {
|
|
|
|
|
debugPrint("Search settings corrupt, forcing default.");
|
|
|
|
|
failed = true;
|
|
|
|
|
break;
|
2024-10-22 02:15:09 -06:00
|
|
|
}
|
|
|
|
|
}
|
2024-10-22 03:21:37 -06:00
|
|
|
if (failed) {
|
|
|
|
|
settings = defaultSettings;
|
2024-10-22 02:15:09 -06:00
|
|
|
}
|
|
|
|
|
}
|
2024-10-22 03:21:37 -06:00
|
|
|
if (settings.combineWith != "AND") {
|
2024-10-22 00:41:46 -06:00
|
|
|
delete settings.combineWith; //remove if unset to avoid crashing
|
2024-10-19 00:08:34 -06:00
|
|
|
}
|
2024-10-22 00:41:46 -06:00
|
|
|
let results = await search.findAllMatches(query, settings);
|
2024-10-22 03:21:37 -06:00
|
|
|
debugPrint(results);
|
2024-10-24 02:09:54 -06:00
|
|
|
if(results.items.length && pageNum == 1){
|
2024-10-23 02:25:12 -06:00
|
|
|
queryCount += 1;
|
|
|
|
|
FileHandler.writeFile(queryCountFile, String(queryCount));
|
|
|
|
|
updateDefaults()
|
|
|
|
|
}
|
2024-10-22 00:04:07 -06:00
|
|
|
let options = {
|
2024-10-18 02:23:05 -06:00
|
|
|
query: query,
|
2024-10-17 02:02:07 -06:00
|
|
|
results: results,
|
2024-10-24 01:43:11 -06:00
|
|
|
pageNum: pageNum,
|
2024-10-21 23:39:54 -06:00
|
|
|
indexing: search.indexing,
|
2024-10-24 01:43:11 -06:00
|
|
|
urlPrefix: urlPrefix
|
2024-10-22 00:41:46 -06:00
|
|
|
};
|
|
|
|
|
let page = "results";
|
|
|
|
|
options = buildOptions(page, options);
|
|
|
|
|
res.render(indexPage, options);
|
2024-10-23 02:25:12 -06:00
|
|
|
|
2024-10-22 00:41:46 -06:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
app.get("/lucky", async function (req, res) {
|
2024-10-22 03:21:37 -06:00
|
|
|
let results = [];
|
|
|
|
|
if (req.query.q) {
|
2024-10-22 03:17:20 -06:00
|
|
|
let settings = req.query.s ? JSON.parse(req.query.s) : defaultSettings;
|
|
|
|
|
results = await search.findAllMatches(req.query.q, settings);
|
2024-10-22 03:21:37 -06:00
|
|
|
debugPrint(results);
|
2024-10-22 03:17:20 -06:00
|
|
|
}
|
|
|
|
|
if (results.length) {
|
2024-10-22 00:41:46 -06:00
|
|
|
res.redirect(results.items[0].path);
|
|
|
|
|
} else {
|
2024-10-22 03:17:20 -06:00
|
|
|
const magicNum = Math.floor(Math.random() * search.getIndexSize());
|
2024-10-22 03:21:37 -06:00
|
|
|
const luckyPath = search.findIndex(magicNum).path;
|
|
|
|
|
debugPrint(`${magicNum}: ${luckyPath}`);
|
2024-10-22 03:17:20 -06:00
|
|
|
res.redirect(luckyPath);
|
2024-10-17 02:10:44 -06:00
|
|
|
}
|
2024-10-22 04:05:11 -06:00
|
|
|
queryCount += 1;
|
|
|
|
|
FileHandler.writeFile(queryCountFile, String(queryCount));
|
2024-10-22 04:12:21 -06:00
|
|
|
updateDefaults()
|
2024-10-22 00:41:46 -06:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
app.get("/settings", function (req, res) {
|
|
|
|
|
let options = { defaultSettings: defaultSettings };
|
|
|
|
|
let page = "settings";
|
|
|
|
|
options = buildOptions(page, options);
|
|
|
|
|
res.render(indexPage, options);
|
|
|
|
|
});
|
|
|
|
|
|
2024-10-25 05:37:54 -06:00
|
|
|
app.post("/suggest", async function(req, res){
|
2024-10-24 06:01:08 -06:00
|
|
|
if(!req.body){
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if(typeof req.body.query == 'undefined'){
|
|
|
|
|
return
|
|
|
|
|
}
|
2024-10-25 05:37:54 -06:00
|
|
|
let suggestions = await search.getSuggestions(req.body.query, defaultSettings)
|
|
|
|
|
debugPrint(suggestions)
|
2024-10-24 06:01:08 -06:00
|
|
|
res.setHeader('Content-Type', 'application/json');
|
|
|
|
|
res.end(JSON.stringify({ suggestions }));
|
|
|
|
|
})
|
|
|
|
|
|
2024-10-27 01:53:57 -03:00
|
|
|
app.get("/about", function (req, res) {
|
|
|
|
|
let page = "about";
|
|
|
|
|
res.render(indexPage, buildOptions(page));
|
|
|
|
|
});
|
|
|
|
|
|
2024-10-22 00:41:46 -06:00
|
|
|
server.listen(process.env.PORT, process.env.BIND_ADDRESS);
|
|
|
|
|
server.on("listening", function () {
|
|
|
|
|
console.log(
|
|
|
|
|
"Server started on %s:%s.",
|
|
|
|
|
server.address().address,
|
|
|
|
|
server.address().port
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
console.log(`Loaded ${fileCount} known files.`);
|
|
|
|
|
|
2024-10-25 02:31:12 -06:00
|
|
|
cron.schedule("0 30 2 * * 0", getFilesJob);
|