Improved the search and stuff yo

This commit is contained in:
Alexandra
2024-10-17 00:12:16 -06:00
parent a9d876614a
commit a55902773a
7 changed files with 154 additions and 47 deletions

4
.env
View File

@@ -1,4 +1,6 @@
PORT=8062
BIND_ADDRESS=0.0.0.0
FUZZY_LENIENCY=0.6
FUZZY_DISTANCE=0.2
MIN_MATCH=15
FORCE_FILE_REBUILD=0
DEBUG=0

View File

@@ -5,9 +5,9 @@ const { JSDOM } = jsdom
global.DOMParser = new JSDOM().window.DOMParser
export default async function getAllFiles(){
export default async function getAllFiles(catList){
const url = 'https://myrient.erista.me/files/'
let parents = await getFilesAndFolders(url)
let parents = await getFilesAndFolders(url, catList)
let dirWork = splitFilesAndFolders(parents)
let files = dirWork.files
let dirs = dirWork.directories
@@ -15,10 +15,10 @@ export default async function getAllFiles(){
if(process.env.DEBUG == '1'){
console.log(`Working on: ${dirs[0].name}`)
}
let results = await getFilesAndFolders(dirs[0].path, dirs[0].name)
let results = await getFilesAndFolders(dirs[0].path, catList, dirs[0].name)
let working = splitFilesAndFolders(results)
if(working.files.length > 0) files.push(...working.files)
if(working.directories.length > 0) dirs.push(...working.directories)
if(working.files.length > 0) {files.push(...working.files)}
if(working.directories.length > 0) {dirs.push(...working.directories)}
dirs.shift()
let dirStatus = `Directories Remaining: ${dirs.length}, Files Found: ${files.length}`
if(process.env.DEBUG == '1'){
@@ -28,10 +28,15 @@ export default async function getAllFiles(){
singleLineStatus(dirStatus)
}
}
//add IDs after
let id = 0;
for(let file in files){
files[file].id = id++
}
return files
}
async function getFilesAndFolders(url, base = ""){
async function getFilesAndFolders(url, catList, base = ""){
return fetch(url)
.then(response => {
return response.text()
@@ -51,7 +56,9 @@ async function getFilesAndFolders(url, base = ""){
foldername: base,
path: url + path,
size: size,
date: fileList[x].querySelector('.date').innerText,
category: findCategory(base + name, catList),
type: findType(base + name, catList),
date: innertext(fileList[x].querySelector('.date').innerHTML).trim(),
})
}
return fileArray
@@ -88,4 +95,43 @@ function singleLineStatus(str){
else{
console.log(str)
}
}
function findCategory(str, catList){
let foundCat = ''
let catLength = 0
let foundSubCat = ''
let subCatLength = 0
for(let cat in catList.Categories){
if(str.includes(cat)){
if(cat.length > catLength){
foundCat = cat
catLength = cat.length
}
}
}
if(foundCat){
for(let subCat in catList.Categories[foundCat]){
if(str.includes(subCat)){
if(subCat.length > subCatLength){
foundSubCat = catList.Categories[foundCat][subCat]
subCatLength = subCat.length
}
}
}
}
else{
return 'Other'
}
return `${foundCat} ${foundSubCat}`
}
function findType(str, catList){
let foundTypes = ''
for(let type in catList.Types){
if(str.includes(type)){
foundTypes += `(${catList.Types[type]}) `
}
}
return foundTypes.trim()
}

View File

@@ -1,15 +1,39 @@
import Fuse from 'fuse.js'
import MiniSearch from 'minisearch'
export default function fuzzySearch(fileArr, query){
const leniency = parseInt(process.env.FUZZY_LENIENCY)
const options = {
findAllMatches: true,
threshold: leniency,
//ignoreLocation: true,
includeScore: true,
//ignoreFieldNorm: true,
keys: ['filename']
export default class Searcher{
constructor(fileArr){
this.distance = parseFloat(process.env.FUZZY_DISTANCE)
this.minMatch = parseFloat(process.env.MIN_MATCH)
this.createIndex(fileArr)
}
findAllMatches(query){
var startTime = process.hrtime();
let results = this.miniSearch.search(query, {
filter: (result) => {
return result.score >= this.minMatch
}
})
var elapsed = this.parseHrtimeToSeconds(process.hrtime(startTime));
return {
items: results,
elapsed: elapsed
}
}
createIndex(fileArr){
this.miniSearch = new MiniSearch({
fields: ['name', 'category', 'type'],
storeFields: ['name', 'category', 'type', 'date', 'size'],
searchOptions: {
boost: { name: 2 },
fuzzy: this.distance,
},
})
this.miniSearch.addAll(fileArr)
}
parseHrtimeToSeconds(hrtime){
var seconds = (hrtime[0] + (hrtime[1] / 1e9)).toFixed(3);
return seconds;
}
const fuse = new Fuse(fileArr, options)
return fuse.search(query)
}

45
package-lock.json generated
View File

@@ -9,11 +9,12 @@
"ejs": "^3.1.10",
"express": "^4.21.1",
"file-older-than": "^1.0.0",
"fuse.js": "^7.0.0",
"innertext": "^1.0.3",
"jsdom": "^25.0.1",
"minisearch": "^7.1.0",
"node-cron": "^3.0.3",
"node-fetch": "^3.3.2"
"node-fetch": "^3.3.2",
"sanitize": "^2.1.2"
}
},
"node_modules/accepts": {
@@ -618,15 +619,6 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/fuse.js": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz",
"integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==",
"license": "Apache-2.0",
"engines": {
"node": ">=10"
}
},
"node_modules/get-intrinsic": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz",
@@ -921,6 +913,12 @@
}
}
},
"node_modules/lodash": {
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
"license": "MIT"
},
"node_modules/media-typer": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
@@ -993,6 +991,12 @@
"node": "*"
}
},
"node_modules/minisearch": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/minisearch/-/minisearch-7.1.0.tgz",
"integrity": "sha512-tv7c/uefWdEhcu6hvrfTihflgeEi2tN6VV7HJnCjK6VxM75QQJh4t9FwJCsA2EsRS8LCnu3W87CuGPWMocOLCA==",
"license": "MIT"
},
"node_modules/ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
@@ -1207,6 +1211,16 @@
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
"license": "MIT"
},
"node_modules/sanitize": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/sanitize/-/sanitize-2.1.2.tgz",
"integrity": "sha512-AnH/jvL3XQDRVWE2H4E7BBpDfNTDYAX37gRhoA/Hj/8rjeOKAIiu10lpatCubWUTc9K6dCv7uK9iZQ82wGRmDA==",
"license": "ISC",
"dependencies": {
"lodash": "^4.17.0",
"validator": "^13.7.0"
}
},
"node_modules/saxes": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz",
@@ -1432,6 +1446,15 @@
"uuid": "dist/bin/uuid"
}
},
"node_modules/validator": {
"version": "13.12.0",
"resolved": "https://registry.npmjs.org/validator/-/validator-13.12.0.tgz",
"integrity": "sha512-c1Q0mCiPlgdTVVVIJIrBuxNicYE+t/7oKeI9MWLj3fh/uq2Pxh/3eeWbVZ4OcGW1TUf53At0njHw5SMdA3tmMg==",
"license": "MIT",
"engines": {
"node": ">= 0.10"
}
},
"node_modules/vary": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",

View File

@@ -4,11 +4,12 @@
"ejs": "^3.1.10",
"express": "^4.21.1",
"file-older-than": "^1.0.0",
"fuse.js": "^7.0.0",
"innertext": "^1.0.3",
"jsdom": "^25.0.1",
"minisearch": "^7.1.0",
"node-cron": "^3.0.3",
"node-fetch": "^3.3.2"
"node-fetch": "^3.3.2",
"sanitize": "^2.1.2"
},
"type": "module"
}

View File

@@ -1,31 +1,37 @@
import getAllFiles from './lib/dirwalk.js'
import getAllFiles from './lib/dircrawl.js'
import {parseJsonFile, saveJsonFile, fileExists} from './lib/loadfiles.js'
import fuzzySearch from './lib/search.js'
import Searcher from './lib/search.js'
import cron from 'node-cron'
import FileOlderThan from 'file-older-than'
import 'dotenv/config'
import express from 'express'
import http from 'http'
import sanitize from 'sanitize'
var fileListPath = './filelist.json'
var categoryListPath = './lib/categories.json'
var categoryList = await parseJsonFile(categoryListPath)
var fileList = []
async function getFilesJob(){
console.log('Updating the file list.')
fileList = await getAllFiles()
fileList = await getAllFiles(categoryList)
saveJsonFile(fileListPath, fileList)
console.log(`Finished updating file list. ${fileList.length} found.`)
}
if(!fileExists(fileListPath) || FileOlderThan(fileListPath, '1d')){
if(process.env.FORCE_FILE_REBUILD == "1" || !fileExists(fileListPath) || FileOlderThan(fileListPath, '1d')){
await getFilesJob()
}
else{
fileList = await parseJsonFile(fileListPath)
}
var search = new Searcher(fileList)
var app = express();
var server = http.createServer(app);
app.use(sanitize.middleware)
app.set('view engine', 'ejs')
app.get('/', function(req, res) {
@@ -35,7 +41,7 @@ app.get('/', function(req, res) {
})
app.get('/search', function(req, res) {
let results = fuzzySearch(fileList, req.query.q)
let results = search.findAllMatches(req.query.q)
if(process.env.DEBUG == "1"){
console.log(results)
}

View File

@@ -14,35 +14,40 @@ _ / / / _ /_/ /_ / _ / / __/ / / / /_
<button type="submit" class="btn btn-secondary ml-2">Search</button>
</div>
</div>
<p class="m-2">Found <%= results.items.length %> result<%= results.items.length != 1 ? 's': '' %> in <%= results.elapsed %> seconds.</p>
</form>
<div class="col-sm-12 w-100 mt-3">
<table class="table text-white">
<tr>
<th>Name</th>
<th>Category</th>
<th>type</th>
<th>Size</th>
<th>Date</th>
<th>Match %</th>
<th>Search Score</th>
</tr>
<% for (let x = 0; x < results.length; x++) { %>
<% for (let x = 0; x < results.items.length; x++) { %>
<tr>
<td>
<a href="<%= results[x].item.path %>">
<%= results[x].item.filename %>
<a href="<%= results.items[x].path %>">
<%= results.items[x].name %>
</a>
</td>
<td>
<%= results.items[x].category %>
</td>
<td>
<%= results[x].item.size %>
<%= results.items[x].type %>
</td>
<td>
<%= results[x].item.date %>
<%= results.items[x].size %>
</td>
<td>
<%= (1 - results[x].score).toFixed(2) %>%
<%= results.items[x].date %>
</td>
<td>
<%= results.items[x].score.toFixed(2) %>
</td>
</tr>
<% } %>