diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 81cfc12..86f94c7 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -9,7 +9,7 @@ services: deploy: resources: limits: - memory: 1GB + memory: 2GB ports: - "9200:9200" volumes: @@ -28,4 +28,4 @@ services: volumes: elasticsearch_data: - postgres_data: \ No newline at end of file + postgres_data: diff --git a/lib/dbkwworker.js b/lib/dbkwworker.js new file mode 100644 index 0000000..14f2d0d --- /dev/null +++ b/lib/dbkwworker.js @@ -0,0 +1,153 @@ +import { ToWords } from "to-words"; +import { getSample } from "./services/elasticsearch.js"; + +const toWords = new ToWords({ + localeCode: "en-US", + converterOptions: { + ignoreDecimal: false, + doNotAddOnly: true, + }, +}); + +function stringToWordArray(string) { + let symbolRegex = + /_|\+|=|\)|\(|\[|{|}|]|;|:|"|'|<|>|\.|,|\/|\?|\||\\|!|@|#|\$|%|\^|&|\*/g; + let workingString = string.replaceAll("-", " "); + workingString = workingString.replaceAll(symbolRegex, " "); + let stringArray = workingString.split(" "); + return stringArray.filter((entry) => entry.trim() != ""); +} + +function kwProcessor(terms, kwArr) { + for (let term in terms) { + terms[term] = terms[term].toLowerCase(); + } + let foundKws = []; + + for (let word in terms) { + for (let group in kwArr) { + let currentGroup = kwArr[group]; + for (let index in currentGroup) { + if (currentGroup[index] == terms[word]) { + foundKws.push(...currentGroup); + break; + } + } + } + } + if (foundKws) return [...new Set(foundKws)]; +} + +async function getNumerals(stringArr) { + let numerals = []; + let nameWordLen = 0; + for (let word in stringArr) { + let curWord = stringArr[word]; + if (validateRomanNumeral(curWord)) { + nameWordLen = word; + let numeral = parseNumeral(curWord); + if (numeral) numerals.push(numeral); + } + } + //Guard clause, exits when we didn't find a valid numeral + if (!nameWordLen) return; + let searchQuery = stringArr.slice(0, nameWordLen).join(" ").trim(); + //Check if this is a series (Using suggestions right now as we don't need a whole lot) + //Todo: Make a custom elastic search function for this + let results = await getSample(searchQuery); + let series = false; + //always return ii if it's available + for (let x in numerals) { + if (numerals[x] == 2) return [...new Set(numerals)]; + } + if (results.length > 1) { + for (let x in results) { + let seriesNumeral = []; + let words = stringToWordArray(results[x].sample); + for (let word in words) { + let numeral = parseNumeral(words[word]); + if (numeral) seriesNumeral.push(numeral); + } + if (seriesNumeral > 0) { + for (let x in numerals) { + for (let y in seriesNumeral) { + if (numerals[x] != seriesNumeral[y]) { + series = true; + } + } + } + } + } + if (!series) return; + numerals.push(getNumberNames(numerals)); + return [...new Set(numerals)]; + } +} + +function parseNumeral(string) { + //Keep these upper case to reduce the number of false positives. Make sure the input isn't tolower + const romanNumerals = { + /*M: 1000, + CM: 900, + D: 500, + CD: 400, + C: 100, + XC: 90, + L: 50, + XL: 40,*/ + X: 10, + IX: 9, + V: 5, + IV: 4, + I: 1, + }; + if (validateRomanNumeral(string)) { + let numeralSum = 0; + string = string.toUpperCase(); + for (let numeral in romanNumerals) { + while (string.startsWith(numeral)) { + numeralSum += romanNumerals[numeral]; + string = string.substring(numeral.length); + } + } + if (string.length > 0) return 0; + return numeralSum; + } +} + +function getNumberNames(stringArr) { + let numbers = []; + for (let number in stringArr) { + let curNum = stringArr[number]; + if (/^\d+$/.test(curNum)) { + let numberName = toWords.convert(parseInt(curNum)); + if (numberName) numbers.push(numberName.trim()); + } + } + return [...new Set(numbers)]; +} + +function validateRomanNumeral(string) { + if (!string) return false; + if (string == "vim") return false; + let romanRegex = /i|v|x|l|c|d|m/gi; + return !string.replaceAll(romanRegex, ""); +} + +export async function optimizeKws(object) { + for (let column in object.keywords) { + if (!object.data[column]) continue; + let wordArr = stringToWordArray(object.data[column]); + let workKws = kwProcessor(wordArr, object.keywords[column]); + //special case for filenames + if (column == "filename") { + let numerals = await getNumerals(wordArr); + if (numerals) { + workKws.push(...numerals); + } + workKws.push(...getNumberNames(wordArr)); + } + object.data[column + "kws"] = workKws.join(" ").trim(); + } + return object.data; +} diff --git a/lib/dboptimize.js b/lib/dboptimize.js new file mode 100644 index 0000000..80c08e4 --- /dev/null +++ b/lib/dboptimize.js @@ -0,0 +1,99 @@ +import debugPrint from "./debugprint.js"; +import { bulkIndexFiles } from "./services/elasticsearch.js"; +import { File } from "./models/index.js"; +import { readFileSync } from "fs"; +import { fileURLToPath } from "url"; +import { dirname, resolve } from "path"; +import { Piscina, FixedQueue } from "piscina"; +import { timer } from "./time.js"; + +let piscina = new Piscina({ + filename: resolve("./lib", "dbkwworker.js"), + taskQueue: new FixedQueue(), +}); + +const BATCH_SIZE = 1000; +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const relatedKwRoot = "../lib/json/relatedkeywords/"; +const catKwPath = resolve(__dirname, relatedKwRoot + "categories.json"); +const nameKwpath = resolve(__dirname, relatedKwRoot + "names.json"); +const regionKwpath = resolve(__dirname, relatedKwRoot + "regions.json"); +//make sure the child object matches the column in the file db model +const keywords = { + filename: JSON.parse(readFileSync(nameKwpath, "utf8")), + category: JSON.parse(readFileSync(catKwPath, "utf8")), + subcategories: JSON.parse(readFileSync(catKwPath, "utf8")), + region: JSON.parse(readFileSync(regionKwpath, "utf8")), +}; + +export async function optimizeDatabaseKws() { + let proctime = new timer(); + let changes = 0; + console.log("Optimizing DB Keywords..."); + let dbLength = await File.count(); + let optimizeTasks = []; + let resolvedTasks = []; + for (let i = 0; i < dbLength; ) { + singleLineStatus(`Optimizing Keywords: ${i} / ${dbLength}`); + let result = await File.findAndCountAll({ + limit: BATCH_SIZE, + offset: i, + }); + for (let x = 0; x < result.rows.length; x++) { + debugPrint(`Submitting job for: ${result.rows[x]["filename"]}`); + let data = []; + for (let column in keywords) { + data[column] = result.rows[x][column]; + } + optimizeTasks.push( + piscina + .run( + { + data: data, + keywords: keywords, + }, + { name: "optimizeKws" } + ) + .catch((err) => { + console.error(err); + }) + ); + i++; + } + let settledTasks = await Promise.all(optimizeTasks); + resolvedTasks.push(...settledTasks); + debugPrint(`Resolving ${resolvedTasks.length} optimization tasks.`); + for (let y = 0; y < resolvedTasks.length; y++) { + let changed = false; + for (let column in keywords) { + if (result.rows[y][column + "kws"] == resolvedTasks[y][column + "kws"]) + continue; + result.rows[y][column + "kws"] = resolvedTasks[y][column + "kws"]; + changed = true; + } + if (changed) { + result.rows[y].save(); + changes++; + } + } + await bulkIndexFiles(result.rows); + optimizeTasks = []; + resolvedTasks = []; + } + console.log( + `\nCompleted Keyword Optimization for ${changes} row${ + changes > 1 || changes == 0 ? "s" : "" + } in ${proctime.elapsed()}.` + ); +} + +function singleLineStatus(str) { + if (process.stdout.isTTY && process.env.DEBUG != "1") { + process.stdout.clearLine(0); + process.stdout.cursorTo(0); + process.stdout.write(str); + } else { + console.log(str); + } +} diff --git a/lib/dircrawl.js b/lib/dircrawl.js index 2ba25a7..3db52e5 100644 --- a/lib/dircrawl.js +++ b/lib/dircrawl.js @@ -4,6 +4,8 @@ import { resolve } from "path"; import debugPrint from "./debugprint.js"; import { File } from './models/index.js'; import { bulkIndexFiles } from './services/elasticsearch.js'; +import { optimizeDatabaseKws } from "./dboptimize.js"; +import { timer } from "./time.js"; let piscina = new Piscina({ filename: resolve("./lib", "fileworker.js"), @@ -13,7 +15,7 @@ let piscina = new Piscina({ const BATCH_SIZE = 1000; // Process files in batches for better performance export default async function getAllFiles(catList) { - var startTime = process.hrtime(); + var proctime = new timer() const url = "https://myrient.erista.me/files/"; let parentRows = await getTableRows({ url: url, base: "" }); let parents = []; @@ -28,7 +30,7 @@ export default async function getAllFiles(catList) { ); } let dirWork = splitFilesAndFolders(parents); - let files = dirWork.files; + // First run should only have directories. Is there a reason this could change in the future? let dirs = dirWork.directories; let fetchTasks = []; let resolvedFetchTasks = []; @@ -141,8 +143,9 @@ export default async function getAllFiles(catList) { var elapsed = parseHrtimeToSeconds(process.hrtime(startTime)); var m = Math.floor(elapsed / 60); var s = Math.floor(elapsed % 60); - console.log(`\nFinished crawling Myrient in ${m}m${s}s.`); + console.log(`\nFinished crawling Myrient in ${proctime.elapsed()}.`); await piscina.close(); + await optimizeDatabaseKws(); return fileCount; } @@ -204,9 +207,4 @@ function singleLineStatus(str) { } else { console.log(str); } -} - -function parseHrtimeToSeconds(hrtime) { - var seconds = (hrtime[0] + hrtime[1] / 1e9).toFixed(3); - return seconds; -} +} \ No newline at end of file diff --git a/lib/fileworker.js b/lib/fileworker.js index d66b30f..d5dde1c 100644 --- a/lib/fileworker.js +++ b/lib/fileworker.js @@ -51,7 +51,7 @@ export async function parseOutFile(data) { path: data.url + path, size: size, category: category, - hidden: `${category.replaceAll(' ', '')} ${cats.subCat.replaceAll(' ', '')}`, + subcategories: `${cats.subCat.replaceAll(' ', '')}`, type: findType(fullName, data.catList), date: innertext(file.querySelector(".date").innerHTML).trim(), region: findRegion(fullName, data.catList), diff --git a/lib/json/relatedkeywords/categories.json b/lib/json/relatedkeywords/categories.json new file mode 100644 index 0000000..196f588 --- /dev/null +++ b/lib/json/relatedkeywords/categories.json @@ -0,0 +1,41 @@ +[ + [ + "supernintendoentertainmentsystem", + "snes", + "super famicom", + "family computer" + ], + ["nintendoentertainmentsystem", "nes", "famicom", "family computer"], + ["playstation", "ps", "play station"], + ["playstation1", "ps1", "playstation 1", "psone", "one"], + ["playstation2", "ps2", "playstation 2"], + ["playstation3", "ps3", "playstation 3"], + ["playstationmobile", "psm", "mobile"], + ["xbox360", "x360", "xbox"], + ["xbox", "xb"], + ["famicomdisksystem", "fds", "famicom disk system"], + ["gameboyadvance", "gba", "gameboy advance", "game boy"], + ["gameboycolor", "gbc", "gameboy color", "game boy"], + ["gameboy", "gb", "gameboy", "game boy"], + ["gamecube", "gc", "game cube", "dolphin"], + ["megadrive", "md", "megadrive", "mega drive", "genesis"], + ["dreamcast", "dc", "dream cast"], + ["playstationvita", "psv", "playstation vita"], + ["playstationnetwork", "psn", "playstation network"], + ["nintendo switch", "switch", "nx"], + ["nintendo 3ds", "3ds", "three ds", "3d dual screen"], + ["nintendo ds", "ds", "dual screen"], + ["nintendo 64", "n64", "ultra 64"], + ["wiiu", "wii u"], + ["atari 2600", "vcs", "video computer system"], + ["playstationvr", "psvr", "playstation vr"], + ["pc engine", "pcengine", "turbografx", "turbografx-16"], + ["neogeo", "neo geo", "mvs"], + ["xboxone", "xone", "xbox one"], + ["xboxseriesx", "xsx", "xbox series x"], + ["xboxseriess", "xss", "xbox series s"], + ["amiibo", "nfc figure"], + ["mastersystem", "ms", "master system"], + ["wii", "revolution"], + ["appple II", "apple 2"] +] diff --git a/lib/json/relatedkeywords/names.json b/lib/json/relatedkeywords/names.json new file mode 100644 index 0000000..920ccf2 --- /dev/null +++ b/lib/json/relatedkeywords/names.json @@ -0,0 +1,14 @@ +[ + ["vmu", "visual memory unit"], + ["cd", "compact disc"], + ["bd", "blu-ray", "blu ray"], + ["hd", "high definition"], + ["pdf", "portable document format"], + ["dlc", "downloadable content"], + ["byteswapped", "byte swapped"], + ["bigendian", "big endian"], + ["littleendian", "little endian"], + ["pc88", "pc-88", "pc 88"], + ["dvd", "digital video disc", "digital versatile disc"], + ["bros", "brothers", "bros."] +] diff --git a/lib/json/relatedkeywords/regions.json b/lib/json/relatedkeywords/regions.json new file mode 100644 index 0000000..4eba671 --- /dev/null +++ b/lib/json/relatedkeywords/regions.json @@ -0,0 +1,4 @@ +[ + ["uk", "united kingdom"], + ["usa", "united states of america"] +] diff --git a/lib/models/file.js b/lib/models/file.js index e5dfef4..abccbaf 100644 --- a/lib/models/file.js +++ b/lib/models/file.js @@ -24,6 +24,21 @@ export default function (sequelize) { type: DataTypes.TEXT, allowNull: false }, + subcategories: { + type: DataTypes.TEXT + }, + filenamekws: { + type: DataTypes.TEXT, + }, + categorykws: { + type: DataTypes.TEXT, + }, + subcategorieskws: { + type: DataTypes.TEXT, + }, + regionkws: { + type: DataTypes.TEXT, + }, type: { type: DataTypes.TEXT }, diff --git a/lib/search.js b/lib/search.js index 5fb86fc..fbdda17 100644 --- a/lib/search.js +++ b/lib/search.js @@ -1,40 +1,39 @@ -import debugPrint from './debugprint.js' -import { search as elasticSearch, getSuggestions as elasticSuggestions } from './services/elasticsearch.js' -import { File } from './models/index.js' +import debugPrint from "./debugprint.js"; +import { + search as elasticSearch, + getSuggestions as elasticSuggestions, +} from "./services/elasticsearch.js"; +import { File } from "./models/index.js"; export default class Searcher { - constructor(fields) { - this.fields = [...fields] - this.indexing = false - } + constructor(fields) { + this.fields = [...fields]; + this.indexing = false; + } - async findAllMatches(query, options) { - try { - return await elasticSearch(query, options) - } catch (err) { - console.error(err) - return { items: [], elapsed: 0 } - } + async findAllMatches(query, options) { + try { + return await elasticSearch(query, options); + } catch (err) { + console.error(err); + return { items: [], elapsed: 0 }; } + } - async getSuggestions(query, options) { - try { - return await elasticSuggestions(query, options) - } catch (err) { - console.error(err) - return [] - } + async getSuggestions(query, options) { + try { + return await elasticSuggestions(query, options); + } catch (err) { + console.error(err); + return []; } + } - findIndex(id) { - return File.findByPk(id) - } + findIndex(id) { + return File.findByPk(id); + } - async getIndexSize() { - return await File.count() - } - - get termCount() { - return 0 // Not applicable with Elasticsearch - } -} \ No newline at end of file + async getIndexSize() { + return await File.count(); + } +} diff --git a/lib/searchalikes.json b/lib/searchalikes.json deleted file mode 100644 index 7b689de..0000000 --- a/lib/searchalikes.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "StringAssoc": [ - ["supernintendoentertainmentsystem", "snes", "super famicom", "family computer"], - ["nintendoentertainmentsystem", "nes", "famicom", "family computer"], - ["playstation", "ps", "play station"], - ["playstation1", "ps1", "playstation 1", "psone", "one"], - ["playstation2", "ps2", "playstation 2"], - ["playstation3", "ps3", "playstation 3"], - ["playstationmobile", "psm", "mobile"], - ["xbox360", "x360", "xbox"], - ["xbox", "xb"], - ["famicomdisksystem", "fds", "famicom disk system"], - ["gameboyadvance", "gba", "gameboy advance"], - ["gameboycolor", "gbc", "gameboy color"], - ["gameboy", "gb", "gameboy"], - ["gamecube", "gc", "game cube", "dolphin"], - ["megadrive", "md", "megadrive", "mega drive"], - ["dreamcast", "dc", "dream case"], - ["playstationvita", "psv", "playstation vita"], - ["playstationnetwork", "psn", "playstation network"], - ["uk", "united kingdom"], - ["usa", "united states of america"], - ["vmu", "visual memory unit"], - ["cd", "compact disc"], - ["bd", "blu-ray", "blu ray"], - ["hd", "high definition"], - ["pdf", "portable document format"], - ["dlc", "downloadable content"], - ["byteswapped", "byte swapped"], - ["bigendian", "big endian"], - ["littleendian", "little endian"], - ["pc88", "pc-88", "pc 88"], - ["dvd", "digital video disc", "digital versatile disc"], - ["mastersystem", "ms", "master system"], - ["wii", "revolution"], - ["bros", "brothers", "bros."], - ["bros.", "brothers", "bros"], - ["playstation4", "ps4", "playstation 4"], - ["playstation5", "ps5", "playstation 5"], - ["playstationportable", "psp", "playstation portable"], - ["nintendoswitch", "switch", "nx"], - ["nintendo3ds", "3ds", "three ds"], - ["nintendods", "ds", "dual screen"], - ["nintendo64", "n64", "ultra 64"], - ["wiiu", "wii u"], - ["atari2600", "vcs", "video computer system"], - ["segasaturn", "saturn"], - ["genesis", "mega drive", "sega genesis"], - ["virtualboy", "vb", "virtual boy"], - ["playstationvr", "psvr", "playstation vr"], - ["pcengine", "pc engine", "turbografx"], - ["neogeo", "neo geo", "mvs"], - ["xboxone", "xone", "xbox one"], - ["xboxseriesx", "xsx", "xbox series x"], - ["xboxseriess", "xss", "xbox series s"], - ["amiibo", "nfc figure"] - ] -} \ No newline at end of file diff --git a/lib/services/elasticsearch.js b/lib/services/elasticsearch.js index 7a4200b..afd6dd8 100644 --- a/lib/services/elasticsearch.js +++ b/lib/services/elasticsearch.js @@ -69,6 +69,18 @@ export async function initElasticsearch() { region: { type: 'text', analyzer: 'standard' + }, + filenamekws: { + type: 'text', + analyzer: 'standard' + }, + categorykws: { + type: 'text', + analyzer: 'standard' + }, + regionkws: { + type: 'text', + analyzer: 'standard' } } } @@ -102,7 +114,10 @@ export async function bulkIndexFiles(files) { filename: file.filename, category: file.category, type: file.type, - region: file.region + region: file.region, + filenamekws: file.filenamekws, + categorykws: file.categorykws, + regionkws: file.regionkws } ]); @@ -128,6 +143,10 @@ export async function bulkIndexFiles(files) { } export async function search(query, options) { + //add kws for selected fields + for(let field in options.fields){ + options.fields.push(options.fields[field] + 'kws') + } const searchQuery = { index: INDEX_NAME, body: { @@ -204,7 +223,7 @@ function buildMustClauses(query, options) { multi_match: { query: term, fields: options.fields.map(field => - field === 'filename' ? `${field}^2` : field + field === 'filename' || 'filenamekws' ? `${field}^2` : field ), fuzziness: options.fuzzy || 0, type: 'best_fields' @@ -224,7 +243,7 @@ function buildShouldClauses(query, options) { multi_match: { query, fields: options.fields.map(field => - field === 'filename' ? `${field}^2` : field + field === 'filename' || 'filenamekws' ? `${field}^2` : field ), fuzziness: options.fuzzy || 0, type: 'best_fields' @@ -247,7 +266,7 @@ export async function getSuggestions(query, options) { query: { multi_match: { query, - fields: ['filename^2', 'category'], + fields: ['filename^2', 'filenamekws^2', 'category', 'categorykws'], fuzziness: 'AUTO', type: 'best_fields' } @@ -264,4 +283,28 @@ export async function getSuggestions(query, options) { console.error('Suggestion error:', error); return []; } +} + +export async function getSample(query, options){ + try { + const response = await client.search({ + index: INDEX_NAME, + body: { + query: { + match: { + filename: query, + } + }, + _source: ['filename'], + size: 30 + } + }); + + return response.hits.hits.map(hit => ({ + sample: hit._source.filename + })); + } catch (error) { + console.error('Sample error:', error); + return []; + } } \ No newline at end of file diff --git a/lib/time.js b/lib/time.js new file mode 100644 index 0000000..501fa0b --- /dev/null +++ b/lib/time.js @@ -0,0 +1,16 @@ +export class timer { + constructor() { + this.startTime = process.hrtime(); + } + parseHrtimetoSeconds(hrtime) { + var seconds = (hrtime[0] + hrtime[1] / 1e9).toFixed(3); + return seconds; + } + elapsed() { + let elapsed = this.parseHrtimetoSeconds(process.hrtime(this.startTime)); + let h = Math.floor(elapsed / 3600); + let m = Math.floor(elapsed / 60); + let s = Math.floor(elapsed % 60); + return `${h ? h + "h" : ""}${m ? m + "m" : ""}${s + "s"}`; + } +} diff --git a/package-lock.json b/package-lock.json index 8c85953..70bdd6a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -26,6 +26,7 @@ "sanitize": "^2.1.2", "sequelize": "^6.37.1", "sequelize-cli": "^6.6.2", + "to-words": "^4.5.1", "uuid": "^11.1.0" } }, @@ -3576,6 +3577,15 @@ "integrity": "sha512-bu9oCYYWC1iRjx+3UnAjqCsfrWNZV1ghNQf49b3w5xE8J/tNShHTzp5syWJfwGH+pxUgTTLUnzHnfuydW7wmbg==", "license": "MIT" }, + "node_modules/to-words": { + "version": "4.5.1", + "resolved": "https://registry.npmjs.org/to-words/-/to-words-4.5.1.tgz", + "integrity": "sha512-/Yp5UX72RzSaOk+KvUglc/uXgIVjWN3WKqeTouz/izWNkCDHjo1Tmhz9UA7VUtNMUBvkyt59NUGfjL5FBEEDqg==", + "license": "MIT", + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/toidentifier": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", @@ -3656,9 +3666,9 @@ "license": "MIT" }, "node_modules/undici": { - "version": "6.21.1", - "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.1.tgz", - "integrity": "sha512-q/1rj5D0/zayJB2FraXdaWxbhWiNKDvu8naDT2dl1yTlvJp4BLtOcp2a5BvgGNQpYYJzau7tf1WgKv3b+7mqpQ==", + "version": "6.21.3", + "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.3.tgz", + "integrity": "sha512-gBLkYIlEnSp8pFbT64yFgGE6UIB9tAkhukC23PmMDCe5Nd+cRqKxSjw5y54MK2AZMgZfJWMaNE4nYUHgi1XEOw==", "license": "MIT", "engines": { "node": ">=18.17" diff --git a/package.json b/package.json index b731828..8ed2d21 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "sanitize": "^2.1.2", "sequelize": "^6.37.1", "sequelize-cli": "^6.6.2", + "to-words": "^4.5.1", "uuid": "^11.1.0" }, "type": "module" diff --git a/server.js b/server.js index cb4999e..248d594 100644 --- a/server.js +++ b/server.js @@ -8,21 +8,24 @@ import http from "http"; import sanitize from "sanitize"; import debugPrint from "./lib/debugprint.js"; import compression from "compression"; -import cookieParser from 'cookie-parser'; -import { generateAsciiArt } from './lib/asciiart.js'; -import { getEmulatorConfig, isEmulatorCompatible, isNonGameContent } from './lib/emulatorConfig.js'; -import fetch from 'node-fetch'; -import { initDB, File, QueryCount } from './lib/database.js'; -import { initElasticsearch } from './lib/services/elasticsearch.js'; -import i18n, { locales } from './config/i18n.js'; -import { v4 as uuidv4 } from 'uuid'; +import cookieParser from "cookie-parser"; +import { generateAsciiArt } from "./lib/asciiart.js"; +import { + getEmulatorConfig, + isEmulatorCompatible, + isNonGameContent, +} from "./lib/emulatorConfig.js"; +import fetch from "node-fetch"; +import { initDB, File, QueryCount } from "./lib/database.js"; +import { initElasticsearch } from "./lib/services/elasticsearch.js"; +import i18n, { locales } from "./config/i18n.js"; +import { v4 as uuidv4 } from "uuid"; +import { optimizeDatabaseKws } from "./lib/dboptimize.js"; -let categoryListPath = "./lib/categories.json" -let searchAlikesPath = './lib/searchalikes.json' -let nonGameTermsPath = './lib/nonGameTerms.json' -let emulatorsPath = './lib/emulators.json' +let categoryListPath = "./lib/categories.json"; +let nonGameTermsPath = "./lib/nonGameTerms.json"; +let emulatorsPath = "./lib/emulators.json"; let categoryList = await FileHandler.parseJsonFile(categoryListPath); -global.searchAlikes = await FileHandler.parseJsonFile(searchAlikesPath) let nonGameTerms = await FileHandler.parseJsonFile(nonGameTermsPath); let emulatorsData = await FileHandler.parseJsonFile(emulatorsPath); let crawlTime = 0; @@ -36,7 +39,7 @@ await initElasticsearch(); // Get initial counts fileCount = await File.count(); -crawlTime = (await File.max('updatedAt'))?.getTime() || 0; +crawlTime = (await File.max("updatedAt"))?.getTime() || 0; queryCount = (await QueryCount.findOne())?.count || 0; let searchFields = ["filename", "category", "type", "region"]; @@ -65,7 +68,7 @@ let search = new Searcher(searchFields); async function getFilesJob() { console.log("Updating the file list."); fileCount = await getAllFiles(categoryList); - if(!fileCount) { + if (!fileCount) { console.log("File update failed"); return; } @@ -81,14 +84,13 @@ let defaultOptions = { crawlTime: crawlTime, queryCount: queryCount, fileCount: fileCount, - termCount: 0, generateAsciiArt: generateAsciiArt, isEmulatorCompatible: isEmulatorCompatible, isNonGameContent: isNonGameContent, - nonGameTerms: nonGameTerms + nonGameTerms: nonGameTerms, }; -function updateDefaults(){ +function updateDefaults() { defaultOptions.crawlTime = crawlTime; defaultOptions.queryCount = queryCount; defaultOptions.fileCount = fileCount; @@ -98,15 +100,15 @@ let app = express(); let server = http.createServer(app); app.use((req, res, next) => { - res.setHeader('Cross-Origin-Opener-Policy', 'same-origin'); - res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp'); + res.setHeader("Cross-Origin-Opener-Policy", "same-origin"); + res.setHeader("Cross-Origin-Embedder-Policy", "require-corp"); next(); }); app.use(sanitize.middleware); -app.use(compression()) -app.use(express.json()) -app.use(cookieParser()) +app.use(compression()); +app.use(express.json()); +app.use(cookieParser()); app.set("view engine", "ejs"); app.use((req, res, next) => { @@ -137,20 +139,20 @@ app.use((req, res, next) => { // Fallback to English if (!lang) { - lang = 'en'; + lang = "en"; } req.setLocale(lang); res.locals.locale = lang; res.locals.availableLocales = locales; - res.cookie('lang', lang, { maxAge: 365 * 24 * 60 * 60 * 1000 }); // 1 year + res.cookie("lang", lang, { maxAge: 365 * 24 * 60 * 60 * 1000 }); // 1 year next(); }); // Add helper function to all templates -app.locals.__ = function() { +app.locals.__ = function () { return i18n.__.apply(this, arguments); }; @@ -161,9 +163,9 @@ app.get("/", function (req, res) { app.get("/search", async function (req, res) { let query = req.query.q ? req.query.q : ""; - let pageNum = parseInt(req.query.p) - let urlPrefix = encodeURI(`/search?s=${req.query.s}&q=${req.query.q}&p=`) - pageNum = pageNum ? pageNum : 1 + let pageNum = parseInt(req.query.p); + let urlPrefix = encodeURI(`/search?s=${req.query.s}&q=${req.query.q}&p=`); + pageNum = pageNum ? pageNum : 1; let settings = {}; try { settings = req.query.s ? JSON.parse(atob(req.query.s)) : defaultSettings; @@ -189,13 +191,10 @@ app.get("/search", async function (req, res) { } let results = await search.findAllMatches(query, settings); debugPrint(results); - if(results.items.length && pageNum == 1){ + if (results.items.length && pageNum == 1) { queryCount += 1; - await QueryCount.update( - { count: queryCount }, - { where: { id: 1 } } - ); - updateDefaults() + await QueryCount.update({ count: queryCount }, { where: { id: 1 } }); + updateDefaults(); } let options = { query: query, @@ -203,7 +202,7 @@ app.get("/search", async function (req, res) { pageNum: pageNum, indexing: search.indexing, urlPrefix: urlPrefix, - settings: settings + settings: settings, }; let page = "results"; options = buildOptions(page, options); @@ -213,7 +212,9 @@ app.get("/search", async function (req, res) { app.get("/lucky", async function (req, res) { let results = { items: [] }; if (req.query.q) { - let settings = req.query.s ? JSON.parse(atob(req.query.s)) : defaultSettings; + let settings = req.query.s + ? JSON.parse(atob(req.query.s)) + : defaultSettings; results = await search.findAllMatches(req.query.q, settings); debugPrint(results); } @@ -223,17 +224,14 @@ app.get("/lucky", async function (req, res) { const count = await File.count(); const randomId = Math.floor(Math.random() * count); const luckyFile = await File.findOne({ - offset: randomId + offset: randomId, }); debugPrint(`${randomId}: ${luckyFile?.path}`); - res.redirect(luckyFile?.path || '/'); + res.redirect(luckyFile?.path || "/"); } queryCount += 1; - await QueryCount.update( - { count: queryCount }, - { where: { id: 1 } } - ); - updateDefaults() + await QueryCount.update({ count: queryCount }, { where: { id: 1 } }); + updateDefaults(); }); app.get("/settings", function (req, res) { @@ -243,18 +241,21 @@ app.get("/settings", function (req, res) { res.render(indexPage, options); }); -app.post("/suggest", async function(req, res){ - if(!req.body){ - return +app.post("/suggest", async function (req, res) { + if (!req.body) { + return; } - if(typeof req.body.query == 'undefined'){ - return + if (typeof req.body.query == "undefined") { + return; } - let suggestions = await search.getSuggestions(req.body.query, defaultSettings) - debugPrint(suggestions) - res.setHeader('Content-Type', 'application/json'); + let suggestions = await search.getSuggestions( + req.body.query, + defaultSettings + ); + debugPrint(suggestions); + res.setHeader("Content-Type", "application/json"); res.end(JSON.stringify({ suggestions })); -}) +}); app.get("/about", function (req, res) { let page = "about"; @@ -263,8 +264,8 @@ app.get("/about", function (req, res) { app.get("/play/:id", async function (req, res) { // Block access if emulator is disabled - if (process.env.EMULATOR_ENABLED !== 'true') { - res.redirect('/'); + if (process.env.EMULATOR_ENABLED !== "true") { + res.redirect("/"); return; } @@ -272,14 +273,14 @@ app.get("/play/:id", async function (req, res) { let romFile = await search.findIndex(fileId); if (!romFile) { - res.redirect('/'); + res.redirect("/"); return; } let options = { romFile: romFile, emulatorConfig: getEmulatorConfig(romFile.category), - isNonGame: isNonGameContent(romFile.filename, nonGameTerms) + isNonGame: isNonGameContent(romFile.filename, nonGameTerms), }; let page = "emulator"; @@ -289,48 +290,51 @@ app.get("/play/:id", async function (req, res) { app.get("/proxy-rom/:id", async function (req, res, next) { // Block access if emulator is disabled - if (process.env.EMULATOR_ENABLED !== 'true') { - return next(new Error('Emulator feature is disabled')); + if (process.env.EMULATOR_ENABLED !== "true") { + return next(new Error("Emulator feature is disabled")); } let fileId = parseInt(req.params.id); let romFile = await search.findIndex(fileId); if (!romFile) { - return next(new Error('ROM not found')); + return next(new Error("ROM not found")); } try { const response = await fetch(romFile.path); - const contentLength = response.headers.get('content-length'); + const contentLength = response.headers.get("content-length"); - res.setHeader('Content-Type', 'application/zip'); - res.setHeader('Content-Length', contentLength); - res.setHeader('Content-Disposition', `attachment; filename="${romFile.filename}"`); + res.setHeader("Content-Type", "application/zip"); + res.setHeader("Content-Length", contentLength); + res.setHeader( + "Content-Disposition", + `attachment; filename="${romFile.filename}"` + ); // Add all required cross-origin headers - res.setHeader('Cross-Origin-Resource-Policy', 'same-origin'); - res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp'); - res.setHeader('Cross-Origin-Opener-Policy', 'same-origin'); + res.setHeader("Cross-Origin-Resource-Policy", "same-origin"); + res.setHeader("Cross-Origin-Embedder-Policy", "require-corp"); + res.setHeader("Cross-Origin-Opener-Policy", "same-origin"); response.body.pipe(res); } catch (error) { - console.error('Error proxying ROM:', error); + console.error("Error proxying ROM:", error); next(error); } }); app.get("/proxy-bios", async function (req, res, next) { // Block access if emulator is disabled - if (process.env.EMULATOR_ENABLED !== 'true') { - return next(new Error('Emulator feature is disabled')); + if (process.env.EMULATOR_ENABLED !== "true") { + return next(new Error("Emulator feature is disabled")); } const biosUrl = req.query.url; // Validate that URL is from GitHub - if (!biosUrl || !biosUrl.startsWith('https://github.com')) { - return next(new Error('Invalid BIOS URL - only GitHub URLs are allowed')); + if (!biosUrl || !biosUrl.startsWith("https://github.com")) { + return next(new Error("Invalid BIOS URL - only GitHub URLs are allowed")); } try { @@ -340,37 +344,38 @@ app.get("/proxy-bios", async function (req, res, next) { throw new Error(`HTTP error! status: ${response.status}`); } - const contentLength = response.headers.get('content-length'); - const contentType = response.headers.get('content-type'); + const contentLength = response.headers.get("content-length"); + const contentType = response.headers.get("content-type"); - res.setHeader('Content-Type', contentType || 'application/octet-stream'); - res.setHeader('Content-Length', contentLength); + res.setHeader("Content-Type", contentType || "application/octet-stream"); + res.setHeader("Content-Length", contentLength); // Add all required cross-origin headers - res.setHeader('Cross-Origin-Resource-Policy', 'same-origin'); - res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp'); - res.setHeader('Cross-Origin-Opener-Policy', 'same-origin'); + res.setHeader("Cross-Origin-Resource-Policy", "same-origin"); + res.setHeader("Cross-Origin-Embedder-Policy", "require-corp"); + res.setHeader("Cross-Origin-Opener-Policy", "same-origin"); response.body.pipe(res); } catch (error) { - console.error('Error proxying BIOS:', error); + console.error("Error proxying BIOS:", error); next(error); } }); // Proxy route for EmulatorJS content -app.get('/emulatorjs/*', async function (req, res, next) { +app.get("/emulatorjs/*", async function (req, res, next) { try { // Extract the path after /emulatorjs/ - const filePath = req.path.replace(/^\/emulatorjs\//, ''); + const filePath = req.path.replace(/^\/emulatorjs\//, ""); // Support both stable and latest paths const emulatorJsUrl = `https://cdn.emulatorjs.org/stable/${filePath}`; const response = await fetch(emulatorJsUrl, { headers: { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36' - } + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", + }, }); if (!response.ok) { @@ -378,33 +383,33 @@ app.get('/emulatorjs/*', async function (req, res, next) { } // Copy content type and length - const contentType = response.headers.get('content-type'); + const contentType = response.headers.get("content-type"); if (contentType) { - res.setHeader('Content-Type', contentType); + res.setHeader("Content-Type", contentType); } - const contentLength = response.headers.get('content-length'); + const contentLength = response.headers.get("content-length"); if (contentLength) { - res.setHeader('Content-Length', contentLength); + res.setHeader("Content-Length", contentLength); } // Set special headers for WASM files - if (filePath.endsWith('.wasm')) { - res.setHeader('Content-Type', 'application/wasm'); + if (filePath.endsWith(".wasm")) { + res.setHeader("Content-Type", "application/wasm"); } // Special handling for JavaScript files - if (filePath.endsWith('.js')) { - res.setHeader('Content-Type', 'application/javascript'); + if (filePath.endsWith(".js")) { + res.setHeader("Content-Type", "application/javascript"); } - res.setHeader('Cross-Origin-Resource-Policy', 'same-origin'); - res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp'); - res.setHeader('Cross-Origin-Opener-Policy', 'same-origin'); + res.setHeader("Cross-Origin-Resource-Policy", "same-origin"); + res.setHeader("Cross-Origin-Embedder-Policy", "require-corp"); + res.setHeader("Cross-Origin-Opener-Policy", "same-origin"); response.body.pipe(res); } catch (error) { - console.error('Error proxying EmulatorJS content:', error); + console.error("Error proxying EmulatorJS content:", error); next(error); } }); @@ -423,14 +428,15 @@ app.get("/proxy-image", async function (req, res, next) { const imageUrl = req.query.url; if (!imageUrl) { - return next(new Error('No image URL provided')); + return next(new Error("No image URL provided")); } try { const response = await fetch(imageUrl, { headers: { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36' - } + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", + }, }); if (!response.ok) { @@ -438,26 +444,26 @@ app.get("/proxy-image", async function (req, res, next) { } // Copy content type - const contentType = response.headers.get('content-type'); + const contentType = response.headers.get("content-type"); if (contentType) { - res.setHeader('Content-Type', contentType); + res.setHeader("Content-Type", contentType); } - const contentLength = response.headers.get('content-length'); + const contentLength = response.headers.get("content-length"); if (contentLength) { - res.setHeader('Content-Length', contentLength); + res.setHeader("Content-Length", contentLength); } response.body.pipe(res); } catch (error) { - console.error('Error proxying image:', error); + console.error("Error proxying image:", error); next(error); } }); // 404 handler app.use((req, res, next) => { - const err = new Error('Page Not Found'); + const err = new Error("Page Not Found"); err.status = 404; next(err); }); @@ -465,20 +471,20 @@ app.use((req, res, next) => { // Error handling middleware app.use((err, req, res, next) => { const status = err.status || 500; - const message = err.message || 'An unexpected error occurred'; + const message = err.message || "An unexpected error occurred"; - if (process.env.NODE_ENV !== 'production') { + if (process.env.NODE_ENV !== "production") { console.error(err); } res.status(status); - res.render('pages/error', { + res.render("pages/error", { status, message, - stack: process.env.NODE_ENV !== 'production' ? err.stack : null, + stack: process.env.NODE_ENV !== "production" ? err.stack : null, req, - requestId: req.requestId + requestId: req.requestId, }); }); @@ -502,3 +508,4 @@ if ( } cron.schedule("0 30 2 * * *", getFilesJob); +optimizeDatabaseKws(); diff --git a/views/partials/footer.ejs b/views/partials/footer.ejs index 6268660..8e39a33 100644 --- a/views/partials/footer.ejs +++ b/views/partials/footer.ejs @@ -4,8 +4,6 @@