introduce term processing, word associations and hidden category matching

This commit is contained in:
Alexandra
2024-10-25 03:56:56 -06:00
parent 40f12486b7
commit 772f7dd846
5 changed files with 50 additions and 46 deletions

View File

@@ -404,5 +404,8 @@
"World",
"UK",
"Asia"
]
],
"Special": {
"PlayStation": "PlayStation 1"
}
}

View File

@@ -50,7 +50,7 @@ export async function parseOutFile(data) {
path: data.url + path,
size: size,
category: category,
hidden: `${category.replaceAll(' ')} ${cats.subCat.replaceAll(' ')}`,
hidden: `${category.replaceAll(' ', '')} ${cats.subCat.replaceAll(' ', '')}`,
type: findType(fullName, data.catList),
date: innertext(file.querySelector(".date").innerHTML).trim(),
region: findRegion(fullName, data.catList),
@@ -96,9 +96,20 @@ function findCategory(str, catList) {
foundCat = "Others";
}
}
//special fix ups
for(let cat in catList.Special){
let specialString = catList.Special[cat]
if(foundCat == cat){
foundCat = specialString
}
if(foundSubCat == cat){
foundSubCat = specialString
}
}
if (foundSubCat.includes(foundCat)) {
foundCat = "";
}
return {
cat: foundCat,
subCat: foundSubCat

View File

@@ -13,37 +13,26 @@ export default class Searcher{
termProcessor(term){
term = term.toLowerCase()
let stringArray = [term]
let workingArray = []
stringArray.push(...this.stringBreakout(term))
for(let group in this.stringGroups){
let currentGroup = this.stringGroups[group]
workingArray = []
for(let index in currentGroup){
let currentString = currentGroup[index]
workingArray.push(...this.stringBreakout(currentString))
}
for(let index in workingArray){
let matched = false
if(matched){
//skip the rest of the loop if we have a match
continue
}
let currentString = workingArray[index]
if(term == currentString){
stringArray.push(...workingArray)
matched = true
stringArray.push(...Searcher.stringBreakout(term))
for(let group in searchAlikes.StringAssoc){
let currentGroup = searchAlikes.StringAssoc[group]
let leadString = currentGroup[0]
if(term == leadString){
for(let index in currentGroup){
let currentString = currentGroup[index]
stringArray.push(...Searcher.stringBreakout(currentString))
}
}
}
return [...new Set(stringArray)]
}
stringBreakout(string){
let symbolRegex = /-|_|\+|=|\)|\(|\[|{|}|]|;|:|"|'|<|>|\.|,|\/|\?|\||\\|!|@|#|\$|%|\^|&|\*|/g
static stringBreakout(string){
let symbolRegex = /-|_|\+|=|\)|\(|\[|{|}|]|;|:|"|'|<|>|\.|,|\/|\?|\||\\|!|@|#|\$|%|\^|&|\*/g
let array = [string]
let workingString = ''
array.push(string.replaceAll(symbolRegex, ''))
array.push(string.split(' '))
array.push(...string.split(' '))
workingString = string.replaceAll(symbolRegex, ' ')
array.push(...workingString.split(' '))
return [...new Set(array)]
@@ -71,8 +60,8 @@ export default class Searcher{
this.miniSearch = new MiniSearch({
fields: [...this.fields, 'hidden'],
storeFields: ['filename', 'category', 'type', 'date', 'size', 'region', 'path', 'id'],
processTerm: this.termProcessor
})
}
else{
this.miniSearch.removeAll()

View File

@@ -1,22 +1,23 @@
{
"StringGroups": [
["snes", "super nintendo entertainment system", "super famicom", "super family computer"],
["nes", "nintendo entertainment system", "famicom", "family computer"],
["ps1", "playstation 1", "playstation one", "play station"],
["ps2", "playstation 2", "play station"],
["ps3", "playstation 3", "play station"],
["psm", "playstation mobile"],
["x360", "xbox360", "xbox 360", "microsoft"],
["xb", "xbox", "microsoft"],
["fds", "family computer disk system", "famicom disk system"],
["gbc", "game boy advance", "gameboy"],
["gba", "game boy color", "gameboy"],
["gc", "gamecube", "dolphin"],
["md", "megadrive", "genesis"],
["dc", "dreamcast", "dream cast"],
["psv", "playstationvita", "playstation vita"],
["wii", "revolution"],
["psn", "playstation", "network"],
"StringAssoc": [
["supernintendoentertainmentsystem", "snes", "super famicom", "family computer"],
["nintendoentertainmentsystem", "nes", "famicom", "family computer"],
["playstation", "ps", "play station"],
["playstation1", "ps1", "playstation 1", "psone", "one"],
["playstation2", "ps2", "playstation 2"],
["playstation3", "ps3", "playstation 3"],
["playstationmobile", "psm", "mobile"],
["xbox360", "x360", "xbox"],
["xbox", "xb"],
["famicomdisksystem", "fds", "famicom disk system"],
["gameboyadvance", "gba", "gameboy advance"],
["gameboycolor", "gbc", "gameboy color"],
["gameboy", "gb", "gameboy"],
["gamecube", "gc", "game cube", "dolphin"],
["megadrive", "md", "megadrive", "mega drive"],
["dreamcast", "dc", "dream case"],
["playstationvita", "psv", "playstation vita"],
["playstationnetwork", "psn", "playstation network"],
["uk", "united kingdom"],
["usa", "united states of america"],
["vmu", "visual memory unit"],
@@ -30,6 +31,7 @@
["littleendian", "little endian"],
["pc88", "pc-88", "pc 88"],
["dvd", "digital video disc", "digital versatile disc"],
["ms", "master system"]
["mastersystem", "ms", "master system"],
["wii", "revolution"]
]
}

View File

@@ -9,14 +9,13 @@ import http from "http";
import sanitize from "sanitize";
import debugPrint from "./lib/debugprint.js";
import compression from "compression";
import { url } from "inspector";
let fileListPath = "./filelist.json";
let queryCountFile = "./queries.txt";
let categoryListPath = "./lib/categories.json"
let searchAlikesPath = './lib/searchalikes.json'
let categoryList = await FileHandler.parseJsonFile(categoryListPath);
let searchAlikes = await FileHandler.parseJsonFile(searchAlikesPath)
global.searchAlikes = await FileHandler.parseJsonFile(searchAlikesPath)
let crawlTime = 0;
let queryCount = 0;
let fileCount = 0;