diff --git a/.env b/.env index a11b967..f80972f 100644 --- a/.env +++ b/.env @@ -1,4 +1,6 @@ PORT=8062 BIND_ADDRESS=0.0.0.0 -FUZZY_LENIENCY=0.6 +FUZZY_DISTANCE=0.2 +MIN_MATCH=15 +FORCE_FILE_REBUILD=0 DEBUG=0 \ No newline at end of file diff --git a/lib/dirwalk.js b/lib/dircrawl.js similarity index 58% rename from lib/dirwalk.js rename to lib/dircrawl.js index 74915ff..0b7241b 100644 --- a/lib/dirwalk.js +++ b/lib/dircrawl.js @@ -5,9 +5,9 @@ const { JSDOM } = jsdom global.DOMParser = new JSDOM().window.DOMParser -export default async function getAllFiles(){ +export default async function getAllFiles(catList){ const url = 'https://myrient.erista.me/files/' - let parents = await getFilesAndFolders(url) + let parents = await getFilesAndFolders(url, catList) let dirWork = splitFilesAndFolders(parents) let files = dirWork.files let dirs = dirWork.directories @@ -15,10 +15,10 @@ export default async function getAllFiles(){ if(process.env.DEBUG == '1'){ console.log(`Working on: ${dirs[0].name}`) } - let results = await getFilesAndFolders(dirs[0].path, dirs[0].name) + let results = await getFilesAndFolders(dirs[0].path, catList, dirs[0].name) let working = splitFilesAndFolders(results) - if(working.files.length > 0) files.push(...working.files) - if(working.directories.length > 0) dirs.push(...working.directories) + if(working.files.length > 0) {files.push(...working.files)} + if(working.directories.length > 0) {dirs.push(...working.directories)} dirs.shift() let dirStatus = `Directories Remaining: ${dirs.length}, Files Found: ${files.length}` if(process.env.DEBUG == '1'){ @@ -28,10 +28,15 @@ export default async function getAllFiles(){ singleLineStatus(dirStatus) } } + //add IDs after + let id = 0; + for(let file in files){ + files[file].id = id++ + } return files } -async function getFilesAndFolders(url, base = ""){ +async function getFilesAndFolders(url, catList, base = ""){ return fetch(url) .then(response => { return response.text() @@ -51,7 +56,9 @@ async function getFilesAndFolders(url, base = ""){ foldername: base, path: url + path, size: size, - date: fileList[x].querySelector('.date').innerText, + category: findCategory(base + name, catList), + type: findType(base + name, catList), + date: innertext(fileList[x].querySelector('.date').innerHTML).trim(), }) } return fileArray @@ -88,4 +95,43 @@ function singleLineStatus(str){ else{ console.log(str) } +} + +function findCategory(str, catList){ + let foundCat = '' + let catLength = 0 + let foundSubCat = '' + let subCatLength = 0 + for(let cat in catList.Categories){ + if(str.includes(cat)){ + if(cat.length > catLength){ + foundCat = cat + catLength = cat.length + } + } + } + if(foundCat){ + for(let subCat in catList.Categories[foundCat]){ + if(str.includes(subCat)){ + if(subCat.length > subCatLength){ + foundSubCat = catList.Categories[foundCat][subCat] + subCatLength = subCat.length + } + } + } + } + else{ + return 'Other' + } + return `${foundCat} ${foundSubCat}` +} + +function findType(str, catList){ + let foundTypes = '' + for(let type in catList.Types){ + if(str.includes(type)){ + foundTypes += `(${catList.Types[type]}) ` + } + } + return foundTypes.trim() } \ No newline at end of file diff --git a/lib/search.js b/lib/search.js index ff97b0c..32efb1a 100644 --- a/lib/search.js +++ b/lib/search.js @@ -1,15 +1,39 @@ -import Fuse from 'fuse.js' +import MiniSearch from 'minisearch' -export default function fuzzySearch(fileArr, query){ - const leniency = parseInt(process.env.FUZZY_LENIENCY) - const options = { - findAllMatches: true, - threshold: leniency, - //ignoreLocation: true, - includeScore: true, - //ignoreFieldNorm: true, - keys: ['filename'] +export default class Searcher{ + constructor(fileArr){ + this.distance = parseFloat(process.env.FUZZY_DISTANCE) + this.minMatch = parseFloat(process.env.MIN_MATCH) + this.createIndex(fileArr) + } + + findAllMatches(query){ + var startTime = process.hrtime(); + let results = this.miniSearch.search(query, { + filter: (result) => { + return result.score >= this.minMatch + } + }) + var elapsed = this.parseHrtimeToSeconds(process.hrtime(startTime)); + return { + items: results, + elapsed: elapsed + } + } + + createIndex(fileArr){ + this.miniSearch = new MiniSearch({ + fields: ['name', 'category', 'type'], + storeFields: ['name', 'category', 'type', 'date', 'size'], + searchOptions: { + boost: { name: 2 }, + fuzzy: this.distance, + }, + }) + this.miniSearch.addAll(fileArr) + } + parseHrtimeToSeconds(hrtime){ + var seconds = (hrtime[0] + (hrtime[1] / 1e9)).toFixed(3); + return seconds; } - const fuse = new Fuse(fileArr, options) - return fuse.search(query) } \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 8194076..a235010 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,11 +9,12 @@ "ejs": "^3.1.10", "express": "^4.21.1", "file-older-than": "^1.0.0", - "fuse.js": "^7.0.0", "innertext": "^1.0.3", "jsdom": "^25.0.1", + "minisearch": "^7.1.0", "node-cron": "^3.0.3", - "node-fetch": "^3.3.2" + "node-fetch": "^3.3.2", + "sanitize": "^2.1.2" } }, "node_modules/accepts": { @@ -618,15 +619,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/fuse.js": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz", - "integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==", - "license": "Apache-2.0", - "engines": { - "node": ">=10" - } - }, "node_modules/get-intrinsic": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz", @@ -921,6 +913,12 @@ } } }, + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", + "license": "MIT" + }, "node_modules/media-typer": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", @@ -993,6 +991,12 @@ "node": "*" } }, + "node_modules/minisearch": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/minisearch/-/minisearch-7.1.0.tgz", + "integrity": "sha512-tv7c/uefWdEhcu6hvrfTihflgeEi2tN6VV7HJnCjK6VxM75QQJh4t9FwJCsA2EsRS8LCnu3W87CuGPWMocOLCA==", + "license": "MIT" + }, "node_modules/ms": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", @@ -1207,6 +1211,16 @@ "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", "license": "MIT" }, + "node_modules/sanitize": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/sanitize/-/sanitize-2.1.2.tgz", + "integrity": "sha512-AnH/jvL3XQDRVWE2H4E7BBpDfNTDYAX37gRhoA/Hj/8rjeOKAIiu10lpatCubWUTc9K6dCv7uK9iZQ82wGRmDA==", + "license": "ISC", + "dependencies": { + "lodash": "^4.17.0", + "validator": "^13.7.0" + } + }, "node_modules/saxes": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz", @@ -1432,6 +1446,15 @@ "uuid": "dist/bin/uuid" } }, + "node_modules/validator": { + "version": "13.12.0", + "resolved": "https://registry.npmjs.org/validator/-/validator-13.12.0.tgz", + "integrity": "sha512-c1Q0mCiPlgdTVVVIJIrBuxNicYE+t/7oKeI9MWLj3fh/uq2Pxh/3eeWbVZ4OcGW1TUf53At0njHw5SMdA3tmMg==", + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, "node_modules/vary": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", diff --git a/package.json b/package.json index d558ec2..049640e 100644 --- a/package.json +++ b/package.json @@ -4,11 +4,12 @@ "ejs": "^3.1.10", "express": "^4.21.1", "file-older-than": "^1.0.0", - "fuse.js": "^7.0.0", "innertext": "^1.0.3", "jsdom": "^25.0.1", + "minisearch": "^7.1.0", "node-cron": "^3.0.3", - "node-fetch": "^3.3.2" + "node-fetch": "^3.3.2", + "sanitize": "^2.1.2" }, "type": "module" } diff --git a/server.js b/server.js index af4fff7..dcb0965 100644 --- a/server.js +++ b/server.js @@ -1,31 +1,37 @@ -import getAllFiles from './lib/dirwalk.js' +import getAllFiles from './lib/dircrawl.js' import {parseJsonFile, saveJsonFile, fileExists} from './lib/loadfiles.js' -import fuzzySearch from './lib/search.js' +import Searcher from './lib/search.js' import cron from 'node-cron' import FileOlderThan from 'file-older-than' import 'dotenv/config' import express from 'express' import http from 'http' +import sanitize from 'sanitize' var fileListPath = './filelist.json' +var categoryListPath = './lib/categories.json' +var categoryList = await parseJsonFile(categoryListPath) var fileList = [] async function getFilesJob(){ console.log('Updating the file list.') - fileList = await getAllFiles() + fileList = await getAllFiles(categoryList) saveJsonFile(fileListPath, fileList) console.log(`Finished updating file list. ${fileList.length} found.`) } -if(!fileExists(fileListPath) || FileOlderThan(fileListPath, '1d')){ +if(process.env.FORCE_FILE_REBUILD == "1" || !fileExists(fileListPath) || FileOlderThan(fileListPath, '1d')){ await getFilesJob() } else{ fileList = await parseJsonFile(fileListPath) } +var search = new Searcher(fileList) + var app = express(); var server = http.createServer(app); +app.use(sanitize.middleware) app.set('view engine', 'ejs') app.get('/', function(req, res) { @@ -35,7 +41,7 @@ app.get('/', function(req, res) { }) app.get('/search', function(req, res) { - let results = fuzzySearch(fileList, req.query.q) + let results = search.findAllMatches(req.query.q) if(process.env.DEBUG == "1"){ console.log(results) } diff --git a/views/pages/results.ejs b/views/pages/results.ejs index ab7d5af..84b2a6b 100644 --- a/views/pages/results.ejs +++ b/views/pages/results.ejs @@ -14,35 +14,40 @@ _ / / / _ /_/ /_ / _ / / __/ / / / /_ +

Found <%= results.items.length %> result<%= results.items.length != 1 ? 's': '' %> in <%= results.elapsed %> seconds.

- +
+ - + - <% for (let x = 0; x < results.length; x++) { %> + <% for (let x = 0; x < results.items.length; x++) { %> + <% } %>
Name Categorytype Size DateMatch %Search Score
- - <%= results[x].item.filename %> + + <%= results.items[x].name %> - + <%= results.items[x].category %> - <%= results[x].item.size %> + <%= results.items[x].type %> - <%= results[x].item.date %> + <%= results.items[x].size %> - <%= (1 - results[x].score).toFixed(2) %>% + <%= results.items[x].date %> + + <%= results.items[x].score.toFixed(2) %>