mirror of
https://github.com/alexankitty/Myrient-Search-Engine.git
synced 2026-01-15 08:23:18 -03:00
Improved the search and stuff yo
This commit is contained in:
4
.env
4
.env
@@ -1,4 +1,6 @@
|
||||
PORT=8062
|
||||
BIND_ADDRESS=0.0.0.0
|
||||
FUZZY_LENIENCY=0.6
|
||||
FUZZY_DISTANCE=0.2
|
||||
MIN_MATCH=15
|
||||
FORCE_FILE_REBUILD=0
|
||||
DEBUG=0
|
||||
@@ -5,9 +5,9 @@ const { JSDOM } = jsdom
|
||||
global.DOMParser = new JSDOM().window.DOMParser
|
||||
|
||||
|
||||
export default async function getAllFiles(){
|
||||
export default async function getAllFiles(catList){
|
||||
const url = 'https://myrient.erista.me/files/'
|
||||
let parents = await getFilesAndFolders(url)
|
||||
let parents = await getFilesAndFolders(url, catList)
|
||||
let dirWork = splitFilesAndFolders(parents)
|
||||
let files = dirWork.files
|
||||
let dirs = dirWork.directories
|
||||
@@ -15,10 +15,10 @@ export default async function getAllFiles(){
|
||||
if(process.env.DEBUG == '1'){
|
||||
console.log(`Working on: ${dirs[0].name}`)
|
||||
}
|
||||
let results = await getFilesAndFolders(dirs[0].path, dirs[0].name)
|
||||
let results = await getFilesAndFolders(dirs[0].path, catList, dirs[0].name)
|
||||
let working = splitFilesAndFolders(results)
|
||||
if(working.files.length > 0) files.push(...working.files)
|
||||
if(working.directories.length > 0) dirs.push(...working.directories)
|
||||
if(working.files.length > 0) {files.push(...working.files)}
|
||||
if(working.directories.length > 0) {dirs.push(...working.directories)}
|
||||
dirs.shift()
|
||||
let dirStatus = `Directories Remaining: ${dirs.length}, Files Found: ${files.length}`
|
||||
if(process.env.DEBUG == '1'){
|
||||
@@ -28,10 +28,15 @@ export default async function getAllFiles(){
|
||||
singleLineStatus(dirStatus)
|
||||
}
|
||||
}
|
||||
//add IDs after
|
||||
let id = 0;
|
||||
for(let file in files){
|
||||
files[file].id = id++
|
||||
}
|
||||
return files
|
||||
}
|
||||
|
||||
async function getFilesAndFolders(url, base = ""){
|
||||
async function getFilesAndFolders(url, catList, base = ""){
|
||||
return fetch(url)
|
||||
.then(response => {
|
||||
return response.text()
|
||||
@@ -51,7 +56,9 @@ async function getFilesAndFolders(url, base = ""){
|
||||
foldername: base,
|
||||
path: url + path,
|
||||
size: size,
|
||||
date: fileList[x].querySelector('.date').innerText,
|
||||
category: findCategory(base + name, catList),
|
||||
type: findType(base + name, catList),
|
||||
date: innertext(fileList[x].querySelector('.date').innerHTML).trim(),
|
||||
})
|
||||
}
|
||||
return fileArray
|
||||
@@ -88,4 +95,43 @@ function singleLineStatus(str){
|
||||
else{
|
||||
console.log(str)
|
||||
}
|
||||
}
|
||||
|
||||
function findCategory(str, catList){
|
||||
let foundCat = ''
|
||||
let catLength = 0
|
||||
let foundSubCat = ''
|
||||
let subCatLength = 0
|
||||
for(let cat in catList.Categories){
|
||||
if(str.includes(cat)){
|
||||
if(cat.length > catLength){
|
||||
foundCat = cat
|
||||
catLength = cat.length
|
||||
}
|
||||
}
|
||||
}
|
||||
if(foundCat){
|
||||
for(let subCat in catList.Categories[foundCat]){
|
||||
if(str.includes(subCat)){
|
||||
if(subCat.length > subCatLength){
|
||||
foundSubCat = catList.Categories[foundCat][subCat]
|
||||
subCatLength = subCat.length
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else{
|
||||
return 'Other'
|
||||
}
|
||||
return `${foundCat} ${foundSubCat}`
|
||||
}
|
||||
|
||||
function findType(str, catList){
|
||||
let foundTypes = ''
|
||||
for(let type in catList.Types){
|
||||
if(str.includes(type)){
|
||||
foundTypes += `(${catList.Types[type]}) `
|
||||
}
|
||||
}
|
||||
return foundTypes.trim()
|
||||
}
|
||||
@@ -1,15 +1,39 @@
|
||||
import Fuse from 'fuse.js'
|
||||
import MiniSearch from 'minisearch'
|
||||
|
||||
export default function fuzzySearch(fileArr, query){
|
||||
const leniency = parseInt(process.env.FUZZY_LENIENCY)
|
||||
const options = {
|
||||
findAllMatches: true,
|
||||
threshold: leniency,
|
||||
//ignoreLocation: true,
|
||||
includeScore: true,
|
||||
//ignoreFieldNorm: true,
|
||||
keys: ['filename']
|
||||
export default class Searcher{
|
||||
constructor(fileArr){
|
||||
this.distance = parseFloat(process.env.FUZZY_DISTANCE)
|
||||
this.minMatch = parseFloat(process.env.MIN_MATCH)
|
||||
this.createIndex(fileArr)
|
||||
}
|
||||
|
||||
findAllMatches(query){
|
||||
var startTime = process.hrtime();
|
||||
let results = this.miniSearch.search(query, {
|
||||
filter: (result) => {
|
||||
return result.score >= this.minMatch
|
||||
}
|
||||
})
|
||||
var elapsed = this.parseHrtimeToSeconds(process.hrtime(startTime));
|
||||
return {
|
||||
items: results,
|
||||
elapsed: elapsed
|
||||
}
|
||||
}
|
||||
|
||||
createIndex(fileArr){
|
||||
this.miniSearch = new MiniSearch({
|
||||
fields: ['name', 'category', 'type'],
|
||||
storeFields: ['name', 'category', 'type', 'date', 'size'],
|
||||
searchOptions: {
|
||||
boost: { name: 2 },
|
||||
fuzzy: this.distance,
|
||||
},
|
||||
})
|
||||
this.miniSearch.addAll(fileArr)
|
||||
}
|
||||
parseHrtimeToSeconds(hrtime){
|
||||
var seconds = (hrtime[0] + (hrtime[1] / 1e9)).toFixed(3);
|
||||
return seconds;
|
||||
}
|
||||
const fuse = new Fuse(fileArr, options)
|
||||
return fuse.search(query)
|
||||
}
|
||||
45
package-lock.json
generated
45
package-lock.json
generated
@@ -9,11 +9,12 @@
|
||||
"ejs": "^3.1.10",
|
||||
"express": "^4.21.1",
|
||||
"file-older-than": "^1.0.0",
|
||||
"fuse.js": "^7.0.0",
|
||||
"innertext": "^1.0.3",
|
||||
"jsdom": "^25.0.1",
|
||||
"minisearch": "^7.1.0",
|
||||
"node-cron": "^3.0.3",
|
||||
"node-fetch": "^3.3.2"
|
||||
"node-fetch": "^3.3.2",
|
||||
"sanitize": "^2.1.2"
|
||||
}
|
||||
},
|
||||
"node_modules/accepts": {
|
||||
@@ -618,15 +619,6 @@
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/fuse.js": {
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz",
|
||||
"integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/get-intrinsic": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz",
|
||||
@@ -921,6 +913,12 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/lodash": {
|
||||
"version": "4.17.21",
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/media-typer": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
|
||||
@@ -993,6 +991,12 @@
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/minisearch": {
|
||||
"version": "7.1.0",
|
||||
"resolved": "https://registry.npmjs.org/minisearch/-/minisearch-7.1.0.tgz",
|
||||
"integrity": "sha512-tv7c/uefWdEhcu6hvrfTihflgeEi2tN6VV7HJnCjK6VxM75QQJh4t9FwJCsA2EsRS8LCnu3W87CuGPWMocOLCA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ms": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
|
||||
@@ -1207,6 +1211,16 @@
|
||||
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/sanitize": {
|
||||
"version": "2.1.2",
|
||||
"resolved": "https://registry.npmjs.org/sanitize/-/sanitize-2.1.2.tgz",
|
||||
"integrity": "sha512-AnH/jvL3XQDRVWE2H4E7BBpDfNTDYAX37gRhoA/Hj/8rjeOKAIiu10lpatCubWUTc9K6dCv7uK9iZQ82wGRmDA==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"lodash": "^4.17.0",
|
||||
"validator": "^13.7.0"
|
||||
}
|
||||
},
|
||||
"node_modules/saxes": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz",
|
||||
@@ -1432,6 +1446,15 @@
|
||||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/validator": {
|
||||
"version": "13.12.0",
|
||||
"resolved": "https://registry.npmjs.org/validator/-/validator-13.12.0.tgz",
|
||||
"integrity": "sha512-c1Q0mCiPlgdTVVVIJIrBuxNicYE+t/7oKeI9MWLj3fh/uq2Pxh/3eeWbVZ4OcGW1TUf53At0njHw5SMdA3tmMg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/vary": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
|
||||
|
||||
@@ -4,11 +4,12 @@
|
||||
"ejs": "^3.1.10",
|
||||
"express": "^4.21.1",
|
||||
"file-older-than": "^1.0.0",
|
||||
"fuse.js": "^7.0.0",
|
||||
"innertext": "^1.0.3",
|
||||
"jsdom": "^25.0.1",
|
||||
"minisearch": "^7.1.0",
|
||||
"node-cron": "^3.0.3",
|
||||
"node-fetch": "^3.3.2"
|
||||
"node-fetch": "^3.3.2",
|
||||
"sanitize": "^2.1.2"
|
||||
},
|
||||
"type": "module"
|
||||
}
|
||||
|
||||
16
server.js
16
server.js
@@ -1,31 +1,37 @@
|
||||
import getAllFiles from './lib/dirwalk.js'
|
||||
import getAllFiles from './lib/dircrawl.js'
|
||||
import {parseJsonFile, saveJsonFile, fileExists} from './lib/loadfiles.js'
|
||||
import fuzzySearch from './lib/search.js'
|
||||
import Searcher from './lib/search.js'
|
||||
import cron from 'node-cron'
|
||||
import FileOlderThan from 'file-older-than'
|
||||
import 'dotenv/config'
|
||||
import express from 'express'
|
||||
import http from 'http'
|
||||
import sanitize from 'sanitize'
|
||||
|
||||
var fileListPath = './filelist.json'
|
||||
var categoryListPath = './lib/categories.json'
|
||||
var categoryList = await parseJsonFile(categoryListPath)
|
||||
var fileList = []
|
||||
|
||||
async function getFilesJob(){
|
||||
console.log('Updating the file list.')
|
||||
fileList = await getAllFiles()
|
||||
fileList = await getAllFiles(categoryList)
|
||||
saveJsonFile(fileListPath, fileList)
|
||||
console.log(`Finished updating file list. ${fileList.length} found.`)
|
||||
}
|
||||
|
||||
if(!fileExists(fileListPath) || FileOlderThan(fileListPath, '1d')){
|
||||
if(process.env.FORCE_FILE_REBUILD == "1" || !fileExists(fileListPath) || FileOlderThan(fileListPath, '1d')){
|
||||
await getFilesJob()
|
||||
}
|
||||
else{
|
||||
fileList = await parseJsonFile(fileListPath)
|
||||
}
|
||||
|
||||
var search = new Searcher(fileList)
|
||||
|
||||
var app = express();
|
||||
var server = http.createServer(app);
|
||||
app.use(sanitize.middleware)
|
||||
app.set('view engine', 'ejs')
|
||||
|
||||
app.get('/', function(req, res) {
|
||||
@@ -35,7 +41,7 @@ app.get('/', function(req, res) {
|
||||
})
|
||||
|
||||
app.get('/search', function(req, res) {
|
||||
let results = fuzzySearch(fileList, req.query.q)
|
||||
let results = search.findAllMatches(req.query.q)
|
||||
if(process.env.DEBUG == "1"){
|
||||
console.log(results)
|
||||
}
|
||||
|
||||
@@ -14,35 +14,40 @@ _ / / / _ /_/ /_ / _ / / __/ / / / /_
|
||||
<button type="submit" class="btn btn-secondary ml-2">Search</button>
|
||||
</div>
|
||||
</div>
|
||||
<p class="m-2">Found <%= results.items.length %> result<%= results.items.length != 1 ? 's': '' %> in <%= results.elapsed %> seconds.</p>
|
||||
</form>
|
||||
|
||||
|
||||
<div class="col-sm-12 w-100 mt-3">
|
||||
<table class="table text-white">
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Category</th>
|
||||
<th>type</th>
|
||||
<th>Size</th>
|
||||
<th>Date</th>
|
||||
<th>Match %</th>
|
||||
<th>Search Score</th>
|
||||
</tr>
|
||||
<% for (let x = 0; x < results.length; x++) { %>
|
||||
<% for (let x = 0; x < results.items.length; x++) { %>
|
||||
<tr>
|
||||
<td>
|
||||
<a href="<%= results[x].item.path %>">
|
||||
<%= results[x].item.filename %>
|
||||
<a href="<%= results.items[x].path %>">
|
||||
<%= results.items[x].name %>
|
||||
</a>
|
||||
</td>
|
||||
<td>
|
||||
|
||||
<%= results.items[x].category %>
|
||||
</td>
|
||||
<td>
|
||||
<%= results[x].item.size %>
|
||||
<%= results.items[x].type %>
|
||||
</td>
|
||||
<td>
|
||||
<%= results[x].item.date %>
|
||||
<%= results.items[x].size %>
|
||||
</td>
|
||||
<td>
|
||||
<%= (1 - results[x].score).toFixed(2) %>%
|
||||
<%= results.items[x].date %>
|
||||
</td>
|
||||
<td>
|
||||
<%= results.items[x].score.toFixed(2) %>
|
||||
</td>
|
||||
</tr>
|
||||
<% } %>
|
||||
|
||||
Reference in New Issue
Block a user