Use PostgreSQL and Elasticsearch instead of file-based storage

This commit is contained in:
2025-01-28 20:14:19 -03:00
parent 8c1910827b
commit 901fa09f61
15 changed files with 2326 additions and 289 deletions

11
.env
View File

@@ -13,3 +13,14 @@ MAX_FETCH_JOBS=1000
INSTANCE_NAME=Myrient
# Enable the built-in emulator
EMULATOR_ENABLED=true
# Run docker-compose.dev.yml for running locally
# Database Configuration
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_DB=myrient
POSTGRES_USER=postgres
POSTGRES_PASSWORD=development
# Elasticsearch Configuration
ELASTICSEARCH_URL=http://localhost:9200

31
docker-compose.dev.yml Normal file
View File

@@ -0,0 +1,31 @@
# Run this for development
services:
elasticsearch:
image: elasticsearch:8.12.2
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- "ES_JAVA_OPTS=-Xms256m -Xmx256m"
deploy:
resources:
limits:
memory: 512M
ports:
- "9200:9200"
volumes:
- elasticsearch_data:/usr/share/elasticsearch/data
postgres:
image: postgres:16-alpine
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: development
POSTGRES_DB: myrient
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
volumes:
elasticsearch_data:
postgres_data:

View File

@@ -1,5 +1,3 @@
version: '3.8'
services:
myrient-search:
image: ghcr.io/alexankitty/myrient-search-engine:latest
@@ -15,6 +13,43 @@ services:
- MAX_FETCH_JOBS=1000
- INSTANCE_NAME=Myrient
- EMULATOR_ENABLED=true
- POSTGRES_HOST=postgres
- POSTGRES_PORT=5432
- POSTGRES_DB=myrient
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=this-is-a-secure-db-password
- ELASTICSEARCH_URL=http://elasticsearch:9200
volumes:
- ./data:/usr/src/app/data
restart: unless-stopped
depends_on:
- postgres
- elasticsearch
restart: unless-stopped
elasticsearch:
image: elasticsearch:8.17.1
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- "ES_JAVA_OPTS=-Xms256m -Xmx256m"
deploy: # Remove if you have a lot of free memory
resources:
limits:
memory: 512M
volumes:
- elasticsearch_data:/usr/share/elasticsearch/data
restart: unless-stopped
postgres:
image: postgres:alpine
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: this-is-a-secure-db-password
POSTGRES_DB: myrient
volumes:
- postgres_data:/var/lib/postgresql/data
restart: unless-stopped
volumes:
elasticsearch_data:
postgres_data:

76
lib/database.js Normal file
View File

@@ -0,0 +1,76 @@
import { Sequelize } from 'sequelize';
import 'dotenv/config';
// Import models
import defineFile from './models/file.js';
import defineQueryCount from './models/queryCount.js';
const sequelize = new Sequelize(process.env.POSTGRES_DB, process.env.POSTGRES_USER, process.env.POSTGRES_PASSWORD, {
host: process.env.POSTGRES_HOST || 'localhost',
port: process.env.POSTGRES_PORT || 5432,
dialect: 'postgres',
logging: process.env.DEBUG === '1' ? console.log : false
});
// Initialize models
export const File = defineFile(sequelize);
export const QueryCount = defineQueryCount(sequelize);
export async function initDB() {
try {
// First try to connect to postgres directly to create database if needed
const rootSequelize = new Sequelize('postgres', process.env.POSTGRES_USER, process.env.POSTGRES_PASSWORD, {
host: process.env.POSTGRES_HOST || 'localhost',
port: process.env.POSTGRES_PORT || 5432,
dialect: 'postgres',
logging: false
});
try {
// Try to create database if it doesn't exist
await rootSequelize.query(`CREATE DATABASE ${process.env.POSTGRES_DB};`);
console.log('Database did not exist, created.');
} catch (err) {
// Ignore error if database already exists
if (!err.message.includes('already exists')) {
throw err;
}
} finally {
await rootSequelize.close();
}
// Now connect to the actual database
await sequelize.authenticate();
console.log('DB connected.');
// Get current database schema
const queryInterface = sequelize.getQueryInterface();
const tables = await queryInterface.showAllTables();
if (!tables.includes('Files') || !tables.includes('QueryCounts')) {
// If tables don't exist, create them
console.log('DB doesn\'t exist, creating initial database schema...');
await sequelize.sync();
console.log('Database schema created.');
// Initialize QueryCount if it's a new installation
await QueryCount.create({ count: 0 });
} else {
// Auto-migrate existing schema
console.log('Checking for DB migrations...');
await sequelize.sync({ alter: true });
console.log('DB migrations completed.');
}
// Only force sync if explicitly requested
if (process.env.FORCE_FILE_REBUILD === '1') {
await sequelize.sync({ force: true });
console.log('DB forcefully synchronized.');
}
} catch (error) {
console.error('Unable to connect to the DB:', error);
process.exit(1);
}
}
export default sequelize;

View File

@@ -2,12 +2,16 @@ import { getTableRows, parseOutFile } from "./fileworker.js";
import { Piscina, FixedQueue } from "piscina";
import { resolve } from "path";
import debugPrint from "./debugprint.js";
import { File } from './models/index.js';
import { bulkIndexFiles } from './services/elasticsearch.js';
let piscina = new Piscina({
filename: resolve("./lib", "fileworker.js"),
taskQueue: new FixedQueue(),
});
const BATCH_SIZE = 1000; // Process files in batches for better performance
export default async function getAllFiles(catList) {
var startTime = process.hrtime();
const url = "https://myrient.erista.me/files/";
@@ -29,6 +33,9 @@ export default async function getAllFiles(catList) {
let fetchTasks = [];
let resolvedFetchTasks = [];
let parseTasks = [];
let fileCount = 0;
let currentBatch = [];
while (
dirs.length > 0 ||
fetchTasks.length > 0 ||
@@ -38,7 +45,6 @@ export default async function getAllFiles(catList) {
let dirStatus = "";
if (dirs.length > 0) {
debugPrint(`Queueing: ${dirs[0].name}`);
//add tasks
fetchTasks.push(
piscina
.run(
@@ -51,7 +57,7 @@ export default async function getAllFiles(catList) {
);
dirs.shift();
}
//push completed fetch tasks to parse
if (
fetchTasks.length >= parseInt(process.env.MAX_FETCH_JOBS) ||
((fetchTasks.length > 0 || resolvedFetchTasks.length > 0) &&
@@ -61,8 +67,7 @@ export default async function getAllFiles(catList) {
let settledTasks = await Promise.all(fetchTasks);
resolvedFetchTasks.push(...settledTasks);
while (resolvedFetchTasks.length > 0) {
if (piscina.queueSize >=parseInt(process.env.MAX_JOB_QUEUE)) {
//jump out if we have a ton of tasks scheduled.
if (piscina.queueSize >= parseInt(process.env.MAX_JOB_QUEUE)) {
break;
}
let completedTask = resolvedFetchTasks[0];
@@ -86,25 +91,38 @@ export default async function getAllFiles(catList) {
resolvedFetchTasks.shift();
}
fetchTasks = []; //purge
dirStatus = `Directories Remaining: ${dirs.length}, Files Found: ${files.length}`;
fetchTasks = [];
dirStatus = `Directories Remaining: ${dirs.length}, Files Found: ${fileCount}`;
}
//resolve parse tasks to go through fetch tasks
if (dirs.length == 0 && parseTasks.length > 0) {
if (process.env.DEBUG == "1") {
console.log(`Resolving ${parseTasks.length} parse tasks.`);
}
debugPrint(`Resolving ${parseTasks.length} parse tasks.`);
let settledTasks = await Promise.all(parseTasks);
let working = splitFilesAndFolders(settledTasks);
if (working.files.length > 0) {
files.push(...working.files);
// Process files in smaller chunks to avoid stack overflow
for (let i = 0; i < working.files.length; i++) {
currentBatch.push(working.files[i]);
if (currentBatch.length >= BATCH_SIZE) {
await processBatch(currentBatch);
fileCount += currentBatch.length;
currentBatch = [];
}
}
}
if (working.directories.length > 0) {
dirs.push(...working.directories);
// Process directories in chunks to avoid stack overflow
for (let i = 0; i < working.directories.length; i++) {
dirs.push(working.directories[i]);
}
}
parseTasks = []; //purge
dirStatus = `Directories Remaining: ${dirs.length}, Files Found: ${files.length}`;
parseTasks = [];
dirStatus = `Directories Remaining: ${dirs.length}, Files Found: ${fileCount}`;
}
if (dirStatus) {
if (process.env.DEBUG == "1") {
console.log(dirStatus);
@@ -113,18 +131,51 @@ export default async function getAllFiles(catList) {
}
}
}
//add IDs after and strip full file name
let id = 0;
for (let file in files) {
files[file].id = id++;
delete files[file].name;
// Process any remaining files in the last batch
if (currentBatch.length > 0) {
await processBatch(currentBatch);
fileCount += currentBatch.length;
}
var elapsed = parseHrtimeToSeconds(process.hrtime(startTime));
var m = Math.floor(elapsed / 60);
var s = Math.floor(elapsed % 60);
console.log(`\nFinished crawling Myrient in ${m}m${s}s.`);
await piscina.close();
return files;
return fileCount;
}
async function processBatch(files) {
try {
// Process in small chunks to avoid memory issues
const chunkSize = 1000;
for (let i = 0; i < files.length; i += chunkSize) {
const chunk = files.slice(i, i + chunkSize);
const dbFiles = await File.bulkCreate(
chunk.map(file => ({
filename: file.filename,
path: file.path,
size: file.size,
category: file.category,
type: file.type,
date: file.date,
region: file.region,
group: file.group
})),
{
returning: true,
updateOnDuplicate: ['path']
}
);
// Index chunk in Elasticsearch
await bulkIndexFiles(dbFiles);
debugPrint(`Processed ${i + chunk.length} of ${files.length} files in current batch`);
}
} catch (error) {
console.error('Error processing batch:', error);
}
}
function splitFilesAndFolders(dirArray) {

49
lib/models/file.js Normal file
View File

@@ -0,0 +1,49 @@
import { DataTypes } from 'sequelize';
export default function (sequelize) {
const File = sequelize.define('File', {
id: {
type: DataTypes.INTEGER,
primaryKey: true,
autoIncrement: true
},
filename: {
type: DataTypes.STRING,
allowNull: false
},
path: {
type: DataTypes.TEXT,
allowNull: false,
unique: true
},
size: {
type: DataTypes.TEXT,
allowNull: false
},
category: {
type: DataTypes.TEXT,
allowNull: false
},
type: {
type: DataTypes.TEXT
},
date: {
type: DataTypes.TEXT
},
region: {
type: DataTypes.TEXT
},
group: {
type: DataTypes.TEXT
}
}, {
indexes: [
{ fields: ['filename'] },
{ fields: ['category'] },
{ fields: ['type'] },
{ fields: ['region'] }
]
});
return File;
}

1
lib/models/index.js Normal file
View File

@@ -0,0 +1 @@
export { File, QueryCount } from '../database.js';

18
lib/models/queryCount.js Normal file
View File

@@ -0,0 +1,18 @@
import { DataTypes } from 'sequelize';
export default function (sequelize) {
const QueryCount = sequelize.define('QueryCount', {
id: {
type: DataTypes.INTEGER,
primaryKey: true,
autoIncrement: true
},
count: {
type: DataTypes.INTEGER,
defaultValue: 0,
allowNull: false
}
});
return QueryCount;
}

View File

@@ -51,6 +51,7 @@
"update",
"utility",
"video",
"Virtual Console",
"wallpaper"
]
}

View File

@@ -1,196 +1,40 @@
import MiniSearch from 'minisearch'
import debugPrint from './debugprint.js'
import { search as elasticSearch, getSuggestions as elasticSuggestions } from './services/elasticsearch.js'
import { File } from './models/index.js'
export default class Searcher{
constructor(fields, stringGroups){
this.distance = parseFloat(process.env.FUZZY_DISTANCE)
this.minMatch = parseFloat(process.env.MIN_MATCH)
this.indexing = false
this.stringGroups = stringGroups
export default class Searcher {
constructor(fields) {
this.fields = [...fields]
}
termProcessor(term){
term = term.toLowerCase()
let stringArray = [term]
stringArray.push(...Searcher.stringBreakout(term))
for(let group in searchAlikes.StringAssoc){
let currentGroup = searchAlikes.StringAssoc[group]
let leadString = currentGroup[0]
if(term == leadString){
for(let index in currentGroup){
let currentString = currentGroup[index]
stringArray.push(...Searcher.stringBreakout(currentString))
}
}
}
return [...new Set(stringArray)]
}
static stringBreakout(string){
let symbolRegex = /-|_|\+|=|\)|\(|\[|{|}|]|;|:|"|'|<|>|\.|,|\/|\?|\||\\|!|@|#|\$|%|\^|&|\*/g
let array = [string]
let workingString = ''
array.push(string.replaceAll(symbolRegex, ''))
array.push(...string.split(' '))
workingString = string.replaceAll(symbolRegex, ' ')
array.push(...workingString.split(' '))
return [...new Set(array)]
}
stringToWordArray(string){
let symbolRegex = /-|_|\+|=|\)|\(|\[|{|}|]|;|:|"|'|<|>|\.|,|\/|\?|\||\\|!|@|#|\$|%|\^|&|\*/g
let workingString = string.replaceAll(symbolRegex, ' ')
let stringArray = workingString.split(' ')
return stringArray.filter(entry => entry.trim() != '');
}
async findAllMatches(query, options){
try{
let optionsValue = structuredClone(options)
var startTime = process.hrtime();
optionsValue.fields.push('hidden')
debugPrint(options)
let results = this.miniSearch.search(query, optionsValue)
var elapsed = this.parseHrtimeToSeconds(process.hrtime(startTime));
return {
items: results,
elapsed: elapsed
}
}
catch(err){
console.error(err)
}
}
async createIndex(fileArr){
if(!this.miniSearch){
this.miniSearch = new MiniSearch({
fields: [...this.fields, 'hidden'],
storeFields: ['filename', 'category', 'type', 'date', 'size', 'region', 'path', 'id', 'group'],
processTerm: this.termProcessor
})
}
else{
this.miniSearch.removeAll()
}
this.indexing = true
this.miniSearch.addAll(fileArr)
console.log('File list indexing completed.')
console.log(`Total terms in index: ${this.miniSearch.termCount}`)
this.indexing = false
}
async updateIndex(fileArr){
let fields = [...this.fields]
fields.push('id')
console.log('Performing Index Update.')
for(let x = 0; x < fileArr.length; x++){
let searchIndex = this.findIndex(x)
if(!searchIndex){
//add if it doesn't exist in the index
debugPrint(`Adding index ${x}`)
this.miniSearch.add(fileArr[x])
continue
}
let changed = false
for(let field in fields){
let fieldName = fields[field]
let searchField = searchIndex[fieldName]
let fileField = fileArr[x][fieldName]
debugPrint(`${fieldName}: ${searchField} ${fileField}`)
if(searchField == fileField){
changed = true
}
}
if(changed){
debugPrint(`Updating Index ${x}`)
this.miniSearch.replace(fileArr[x])
}
async findAllMatches(query, options) {
try {
return await elasticSearch(query, options)
} catch (err) {
console.error(err)
return { items: [], elapsed: 0 }
}
let indexSize = this.getIndexSize()
if(indexSize > fileArr.length){
debugPrint(`Removing indices ${fileArr.length}-${indexSize}.`)
//clean up indices that are no longer relevant
for(let x = fileArr.length; x < indexSize; x++){
this.miniSearch.discard(x)
}
}
async getSuggestions(query, options) {
try {
return await elasticSuggestions(query, options)
} catch (err) {
console.error(err)
return []
}
console.log(`Completed index update. New Term Count: ${this.miniSearch.termCount}`)
}
parseHrtimeToSeconds(hrtime){
var seconds = (hrtime[0] + (hrtime[1] / 1e9)).toFixed(3);
return seconds;
findIndex(id) {
return File.findByPk(id)
}
sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
async getIndexSize() {
return await File.count()
}
findIndex(id){
//this might be a hack
return this.miniSearch._storedFields.get(id)
}
getIndexSize(){
return this.miniSearch._storedFields.size
}
async getSuggestions(query, options){
query = query.toLowerCase()
options.fields = ['filename', 'category'] //reduce field search
let matches = await this.findAllMatches(query, options)
let results = matches.items
let suggestions = []
for(let result = 0; result < results.length; result++){
let currentResult = results[result]
let fileString = String(currentResult.filename).toLowerCase()
let categoryString = String(currentResult.category).toLowerCase()
let fileSplit = fileString.split(query)
let categorySplit = categoryString.split(query)
if(fileSplit.length > 1){
let wordSplit = this.stringToWordArray(fileSplit[1])
let prediction = ''
let prefixMatch = String(fileSplit[1]).substring(0,1) != ' '
let prefixSpace = prefixMatch ? '' : ' '
if(wordSplit.length > 1){
prediction = `${prefixSpace}${wordSplit[0]} ${wordSplit[1]}`
}
else if (wordSplit.length == 1){
prediction = `${prefixSpace}${wordSplit[0]}`
}
else {
//bad result discard
continue
}
suggestions.push(`${query}${prediction}`)
continue
}
if(categorySplit.length > 1){
let wordSplit = this.stringToWordArray(categorySplit[1])
if(!wordSplit[0]){
wordSplit.shift()
}
let prediction = ''
let prefixMatch = String(categorySplit[1]).substring(0,1) != ' '
let prefixSpace = prefixMatch ? '' : ' '
if(wordSplit.length > 1){
prediction = `${prefixSpace}${wordSplit[0]} ${wordSplit[1]}`
}
else if (wordSplit.length == 1){
prediction = `${prefixSpace}${wordSplit[0]}`
}
else {
//bad result discard
continue
}
suggestions.push(`${query}${prediction}`)
continue
}
}
let dedupe = [...new Set(suggestions)]
let dedupeLimit = dedupe.length >= 10 ? 10 : dedupe.length
let arr = []
for(let x = 0; x < dedupeLimit; x++){
arr.push({
suggestion: dedupe[x]
})
}
return arr
get termCount() {
return 0 // Not applicable with Elasticsearch
}
}

View File

@@ -0,0 +1,218 @@
import { Client } from '@elastic/elasticsearch';
import debugPrint from '../debugprint.js';
const client = new Client({
node: process.env.ELASTICSEARCH_URL || 'http://localhost:9200'
});
const INDEX_NAME = 'myrient_files';
export async function initElasticsearch() {
try {
const indexExists = await client.indices.exists({ index: INDEX_NAME });
if (!indexExists) {
await client.indices.create({
index: INDEX_NAME,
body: {
settings: {
analysis: {
analyzer: {
filename_analyzer: {
type: 'custom',
tokenizer: 'standard',
filter: ['lowercase', 'word_delimiter_graph']
}
}
}
},
mappings: {
properties: {
filename: {
type: 'text',
analyzer: 'filename_analyzer'
},
category: {
type: 'text',
analyzer: 'standard',
fields: {
keyword: {
type: 'keyword'
}
}
},
type: {
type: 'text',
analyzer: 'standard'
},
region: {
type: 'text',
analyzer: 'standard'
},
path: { type: 'keyword' },
size: { type: 'keyword' },
date: { type: 'keyword' },
group: { type: 'keyword' }
}
}
}
});
console.log('Elasticsearch index created');
}
} catch (error) {
console.error('Elasticsearch init error:', error);
process.exit(1);
}
}
export async function indexFile(file) {
try {
await client.index({
index: INDEX_NAME,
id: file.id.toString(),
document: file
});
debugPrint(`Indexed file: ${file.filename}`);
} catch (error) {
console.error('Error indexing file:', error);
}
}
export async function bulkIndexFiles(files) {
const operations = files.flatMap(file => [
{ index: { _index: INDEX_NAME, _id: file.id.toString() } },
file
]);
try {
const { errors, items } = await client.bulk({
refresh: true,
operations
});
if (errors) {
console.error('Bulk indexing had errors');
items.forEach(item => {
if (item.index.error) {
console.error(item.index.error);
}
});
}
debugPrint(`Bulk indexed ${files.length} files`);
} catch (error) {
console.error('Bulk indexing error:', error);
}
}
export async function search(query, options) {
const searchQuery = {
index: INDEX_NAME,
body: {
size: 1000,
query: {
bool: {
must: buildMustClauses(query, options),
should: buildShouldClauses(query, options)
}
},
highlight: {
fields: {
filename: {},
category: {},
type: {},
region: {}
}
}
}
};
try {
const startTime = process.hrtime();
const response = await client.search(searchQuery);
const elapsed = parseHrtimeToSeconds(process.hrtime(startTime));
return {
items: response.hits.hits.map(hit => ({
...hit._source,
score: hit._score,
highlights: hit.highlight
})),
elapsed
};
} catch (error) {
console.error('Search error:', error);
return { items: [], elapsed: 0 };
}
}
function buildMustClauses(query, options) {
const clauses = [];
if (options.combineWith === 'AND') {
query.split(' ').forEach(term => {
clauses.push({
multi_match: {
query: term,
fields: options.fields.map(field =>
field === 'filename' ? `${field}^2` : field
),
fuzziness: options.fuzzy || 0,
type: 'best_fields'
}
});
});
}
return clauses;
}
function buildShouldClauses(query, options) {
const clauses = [];
if (options.combineWith !== 'AND') {
clauses.push({
multi_match: {
query,
fields: options.fields.map(field =>
field === 'filename' ? `${field}^2` : field
),
fuzziness: options.fuzzy || 0,
type: 'best_fields'
}
});
}
return clauses;
}
function parseHrtimeToSeconds(hrtime) {
return (hrtime[0] + (hrtime[1] / 1e9)).toFixed(3);
}
export async function getSuggestions(query, options) {
try {
const response = await client.search({
index: INDEX_NAME,
body: {
query: {
multi_match: {
query,
fields: ['filename^2', 'category'],
fuzziness: 'AUTO',
type: 'best_fields'
}
},
_source: ['filename', 'category'],
size: 10
}
});
return response.hits.hits.map(hit => ({
suggestion: hit._source.filename
}));
} catch (error) {
console.error('Suggestion error:', error);
return [];
}
}

1741
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,20 +1,24 @@
{
"dependencies": {
"@elastic/elasticsearch": "^8.12.2",
"compression": "^1.7.4",
"dotenv": "^16.4.5",
"ejs": "^3.1.10",
"express": "^4.21.1",
"figlet": "^1.7.0",
"file-older-than": "^1.0.0",
"innertext": "^1.0.3",
"jsdom": "^25.0.1",
"minisearch": "^7.1.0",
"jszip": "^3.10.1",
"node-cron": "^3.0.3",
"node-fetch": "^3.3.2",
"node-html-parser": "^6.1.13",
"pg": "^8.11.3",
"pg-hstore": "^2.3.4",
"piscina": "^4.7.0",
"sanitize": "^2.1.2",
"figlet": "^1.7.0",
"jszip": "^3.10.1"
"sequelize": "^6.37.1",
"sequelize-cli": "^6.6.2"
},
"type": "module"
}

113
server.js
View File

@@ -2,7 +2,6 @@ import getAllFiles from "./lib/dircrawl.js";
import FileHandler from "./lib/filehandler.js";
import Searcher from "./lib/search.js";
import cron from "node-cron";
import FileOlderThan from "file-older-than";
import "dotenv/config";
import express from "express";
import http from "http";
@@ -12,9 +11,9 @@ import compression from "compression";
import { generateAsciiArt } from './lib/asciiart.js';
import { getEmulatorConfig, isEmulatorCompatible, isNonGameContent } from './lib/emulatorConfig.js';
import fetch from 'node-fetch';
import { initDB, File, QueryCount } from './lib/database.js';
import { initElasticsearch } from './lib/services/elasticsearch.js';
let fileListPath = "./data/filelist.json";
let queryCountFile = "./data/queries.txt";
let categoryListPath = "./lib/categories.json"
let searchAlikesPath = './lib/searchalikes.json'
let nonGameTermsPath = './lib/nonGameTerms.json'
@@ -25,12 +24,15 @@ let crawlTime = 0;
let queryCount = 0;
let fileCount = 0;
let indexPage = "pages/index";
if (FileHandler.fileExists(fileListPath)) {
crawlTime = await FileHandler.fileTime(fileListPath);
}
if (FileHandler.fileExists(queryCountFile)) {
queryCount = parseInt(await FileHandler.readFile(queryCountFile));
}
// Initialize databases
await initDB();
await initElasticsearch();
// Get initial counts
fileCount = await File.count();
crawlTime = (await File.max('updatedAt'))?.getTime() || 0;
queryCount = (await QueryCount.findOne())?.count || 0;
let searchFields = ["filename", "category", "type", "region"];
@@ -52,29 +54,16 @@ for (let field in searchFields) {
}
}
let fileList = [];
let search; //cheat so we can check before assignment
let search = new Searcher(searchFields);
async function getFilesJob() {
console.log("Updating the file list.");
fileList = await getAllFiles(categoryList);
if(!fileList){
if(typeof search == "undefined"){
//fall back to loading the list if it exists
await loadFileList()
}
return
fileCount = await getAllFiles(categoryList);
if(!fileCount) {
console.log("File update failed");
return;
}
await FileHandler.saveJsonFile(fileListPath, fileList);
fileCount = fileList.length;
if (typeof search == "undefined") {
search = new Searcher(searchFields);
await search.createIndex(fileList)
} else {
await search.updateIndex(fileList);
}
fileList = [];
crawlTime = await FileHandler.fileTime(fileListPath);
crawlTime = Date.now();
console.log(`Finished updating file list. ${fileCount} found.`);
}
@@ -82,38 +71,19 @@ function buildOptions(page, options) {
return { page: page, ...options, ...defaultOptions };
}
async function loadFileList(){
fileList = await FileHandler.parseJsonFile(fileListPath);
fileCount = fileList.length;
search = new Searcher(searchFields, searchAlikes.StringGroups);
await search.createIndex(fileList)
fileList = [];
}
if (
process.env.FORCE_FILE_REBUILD == "1" ||
!FileHandler.fileExists(fileListPath) ||
FileOlderThan(fileListPath, "1w")
) {
await getFilesJob();
} else {
await loadFileList()
}
let defaultOptions = {
crawlTime: crawlTime,
queryCount: queryCount,
fileCount: fileCount,
termCount: search.miniSearch.termCount,
termCount: 0,
generateAsciiArt: generateAsciiArt,
isEmulatorCompatible: isEmulatorCompatible
};
function updateDefaults(){
defaultOptions.crawlTime = crawlTime
defaultOptions.queryCount = queryCount
defaultOptions.fileCount = fileCount
defaultOptions.termCount = search.miniSearch.termCount
defaultOptions.crawlTime = crawlTime;
defaultOptions.queryCount = queryCount;
defaultOptions.fileCount = fileCount;
}
let app = express();
@@ -154,13 +124,16 @@ app.get("/search", async function (req, res) {
}
}
if (settings.combineWith != "AND") {
delete settings.combineWith; //remove if unset to avoid crashing
delete settings.combineWith;
}
let results = await search.findAllMatches(query, settings);
debugPrint(results);
if(results.items.length && pageNum == 1){
queryCount += 1;
FileHandler.writeFile(queryCountFile, String(queryCount));
await QueryCount.update(
{ count: queryCount },
{ where: { id: 1 } }
);
updateDefaults()
}
let options = {
@@ -173,26 +146,31 @@ app.get("/search", async function (req, res) {
let page = "results";
options = buildOptions(page, options);
res.render(indexPage, options);
});
app.get("/lucky", async function (req, res) {
let results = [];
if (req.query.q) {
let settings = req.query.s ? JSON.parse(req.query.s) : defaultSettings;
let settings = req.query.s ? JSON.parse(atob(req.query.s)) : defaultSettings;
results = await search.findAllMatches(req.query.q, settings);
debugPrint(results);
}
if (results.length) {
if (results.items.length) {
res.redirect(results.items[0].path);
} else {
const magicNum = Math.floor(Math.random() * search.getIndexSize());
const luckyPath = search.findIndex(magicNum).path;
debugPrint(`${magicNum}: ${luckyPath}`);
res.redirect(luckyPath);
const count = await File.count();
const randomId = Math.floor(Math.random() * count);
const luckyFile = await File.findOne({
offset: randomId
});
debugPrint(`${randomId}: ${luckyFile?.path}`);
res.redirect(luckyFile?.path || '/');
}
queryCount += 1;
FileHandler.writeFile(queryCountFile, String(queryCount));
await QueryCount.update(
{ count: queryCount },
{ where: { id: 1 } }
);
updateDefaults()
});
@@ -229,7 +207,7 @@ app.get("/play/:id", async function (req, res) {
}
let fileId = parseInt(req.params.id);
let romFile = search.findIndex(fileId);
let romFile = await search.findIndex(fileId);
if (!romFile) {
res.redirect('/');
@@ -255,7 +233,7 @@ app.get("/proxy-rom/:id", async function (req, res) {
}
let fileId = parseInt(req.params.id);
let romFile = search.findIndex(fileId);
let romFile = await search.findIndex(fileId);
if (!romFile) {
res.status(404).send('ROM not found');
@@ -322,4 +300,13 @@ server.on("listening", function () {
});
console.log(`Loaded ${fileCount} known files.`);
// Run file update job if needed
if (
process.env.FORCE_FILE_REBUILD == "1" ||
!fileCount ||
(crawlTime && Date.now() - crawlTime > 7 * 24 * 60 * 60 * 1000) // 1 week
) {
await getFilesJob();
}
cron.schedule("0 30 2 * * *", getFilesJob);

View File

@@ -144,7 +144,7 @@
</div>
<script defer>
resultTable = new DataTable('#results', {
"order": [[6, 'desc']],
"order": [[7, 'desc']],
"columns": [
{ "data": "name" }, // Name
{ "data": "group" }, // Group