mirror of
https://github.com/alexankitty/Myrient-Search-Engine.git
synced 2026-01-15 16:33:15 -03:00
introduce term processing, word associations and hidden category matching
This commit is contained in:
@@ -404,5 +404,8 @@
|
||||
"World",
|
||||
"UK",
|
||||
"Asia"
|
||||
]
|
||||
],
|
||||
"Special": {
|
||||
"PlayStation": "PlayStation 1"
|
||||
}
|
||||
}
|
||||
@@ -50,7 +50,7 @@ export async function parseOutFile(data) {
|
||||
path: data.url + path,
|
||||
size: size,
|
||||
category: category,
|
||||
hidden: `${category.replaceAll(' ')} ${cats.subCat.replaceAll(' ')}`,
|
||||
hidden: `${category.replaceAll(' ', '')} ${cats.subCat.replaceAll(' ', '')}`,
|
||||
type: findType(fullName, data.catList),
|
||||
date: innertext(file.querySelector(".date").innerHTML).trim(),
|
||||
region: findRegion(fullName, data.catList),
|
||||
@@ -96,9 +96,20 @@ function findCategory(str, catList) {
|
||||
foundCat = "Others";
|
||||
}
|
||||
}
|
||||
//special fix ups
|
||||
for(let cat in catList.Special){
|
||||
let specialString = catList.Special[cat]
|
||||
if(foundCat == cat){
|
||||
foundCat = specialString
|
||||
}
|
||||
if(foundSubCat == cat){
|
||||
foundSubCat = specialString
|
||||
}
|
||||
}
|
||||
if (foundSubCat.includes(foundCat)) {
|
||||
foundCat = "";
|
||||
}
|
||||
|
||||
return {
|
||||
cat: foundCat,
|
||||
subCat: foundSubCat
|
||||
|
||||
@@ -13,37 +13,26 @@ export default class Searcher{
|
||||
termProcessor(term){
|
||||
term = term.toLowerCase()
|
||||
let stringArray = [term]
|
||||
let workingArray = []
|
||||
stringArray.push(...this.stringBreakout(term))
|
||||
for(let group in this.stringGroups){
|
||||
let currentGroup = this.stringGroups[group]
|
||||
workingArray = []
|
||||
for(let index in currentGroup){
|
||||
let currentString = currentGroup[index]
|
||||
workingArray.push(...this.stringBreakout(currentString))
|
||||
}
|
||||
for(let index in workingArray){
|
||||
let matched = false
|
||||
if(matched){
|
||||
//skip the rest of the loop if we have a match
|
||||
continue
|
||||
}
|
||||
let currentString = workingArray[index]
|
||||
if(term == currentString){
|
||||
stringArray.push(...workingArray)
|
||||
matched = true
|
||||
stringArray.push(...Searcher.stringBreakout(term))
|
||||
for(let group in searchAlikes.StringAssoc){
|
||||
let currentGroup = searchAlikes.StringAssoc[group]
|
||||
let leadString = currentGroup[0]
|
||||
if(term == leadString){
|
||||
for(let index in currentGroup){
|
||||
let currentString = currentGroup[index]
|
||||
stringArray.push(...Searcher.stringBreakout(currentString))
|
||||
}
|
||||
}
|
||||
}
|
||||
return [...new Set(stringArray)]
|
||||
}
|
||||
|
||||
stringBreakout(string){
|
||||
let symbolRegex = /-|_|\+|=|\)|\(|\[|{|}|]|;|:|"|'|<|>|\.|,|\/|\?|\||\\|!|@|#|\$|%|\^|&|\*|/g
|
||||
static stringBreakout(string){
|
||||
let symbolRegex = /-|_|\+|=|\)|\(|\[|{|}|]|;|:|"|'|<|>|\.|,|\/|\?|\||\\|!|@|#|\$|%|\^|&|\*/g
|
||||
let array = [string]
|
||||
let workingString = ''
|
||||
array.push(string.replaceAll(symbolRegex, ''))
|
||||
array.push(string.split(' '))
|
||||
array.push(...string.split(' '))
|
||||
workingString = string.replaceAll(symbolRegex, ' ')
|
||||
array.push(...workingString.split(' '))
|
||||
return [...new Set(array)]
|
||||
@@ -71,8 +60,8 @@ export default class Searcher{
|
||||
this.miniSearch = new MiniSearch({
|
||||
fields: [...this.fields, 'hidden'],
|
||||
storeFields: ['filename', 'category', 'type', 'date', 'size', 'region', 'path', 'id'],
|
||||
processTerm: this.termProcessor
|
||||
})
|
||||
|
||||
}
|
||||
else{
|
||||
this.miniSearch.removeAll()
|
||||
|
||||
@@ -1,22 +1,23 @@
|
||||
{
|
||||
"StringGroups": [
|
||||
["snes", "super nintendo entertainment system", "super famicom", "super family computer"],
|
||||
["nes", "nintendo entertainment system", "famicom", "family computer"],
|
||||
["ps1", "playstation 1", "playstation one", "play station"],
|
||||
["ps2", "playstation 2", "play station"],
|
||||
["ps3", "playstation 3", "play station"],
|
||||
["psm", "playstation mobile"],
|
||||
["x360", "xbox360", "xbox 360", "microsoft"],
|
||||
["xb", "xbox", "microsoft"],
|
||||
["fds", "family computer disk system", "famicom disk system"],
|
||||
["gbc", "game boy advance", "gameboy"],
|
||||
["gba", "game boy color", "gameboy"],
|
||||
["gc", "gamecube", "dolphin"],
|
||||
["md", "megadrive", "genesis"],
|
||||
["dc", "dreamcast", "dream cast"],
|
||||
["psv", "playstationvita", "playstation vita"],
|
||||
["wii", "revolution"],
|
||||
["psn", "playstation", "network"],
|
||||
"StringAssoc": [
|
||||
["supernintendoentertainmentsystem", "snes", "super famicom", "family computer"],
|
||||
["nintendoentertainmentsystem", "nes", "famicom", "family computer"],
|
||||
["playstation", "ps", "play station"],
|
||||
["playstation1", "ps1", "playstation 1", "psone", "one"],
|
||||
["playstation2", "ps2", "playstation 2"],
|
||||
["playstation3", "ps3", "playstation 3"],
|
||||
["playstationmobile", "psm", "mobile"],
|
||||
["xbox360", "x360", "xbox"],
|
||||
["xbox", "xb"],
|
||||
["famicomdisksystem", "fds", "famicom disk system"],
|
||||
["gameboyadvance", "gba", "gameboy advance"],
|
||||
["gameboycolor", "gbc", "gameboy color"],
|
||||
["gameboy", "gb", "gameboy"],
|
||||
["gamecube", "gc", "game cube", "dolphin"],
|
||||
["megadrive", "md", "megadrive", "mega drive"],
|
||||
["dreamcast", "dc", "dream case"],
|
||||
["playstationvita", "psv", "playstation vita"],
|
||||
["playstationnetwork", "psn", "playstation network"],
|
||||
["uk", "united kingdom"],
|
||||
["usa", "united states of america"],
|
||||
["vmu", "visual memory unit"],
|
||||
@@ -30,6 +31,7 @@
|
||||
["littleendian", "little endian"],
|
||||
["pc88", "pc-88", "pc 88"],
|
||||
["dvd", "digital video disc", "digital versatile disc"],
|
||||
["ms", "master system"]
|
||||
["mastersystem", "ms", "master system"],
|
||||
["wii", "revolution"]
|
||||
]
|
||||
}
|
||||
@@ -9,14 +9,13 @@ import http from "http";
|
||||
import sanitize from "sanitize";
|
||||
import debugPrint from "./lib/debugprint.js";
|
||||
import compression from "compression";
|
||||
import { url } from "inspector";
|
||||
|
||||
let fileListPath = "./filelist.json";
|
||||
let queryCountFile = "./queries.txt";
|
||||
let categoryListPath = "./lib/categories.json"
|
||||
let searchAlikesPath = './lib/searchalikes.json'
|
||||
let categoryList = await FileHandler.parseJsonFile(categoryListPath);
|
||||
let searchAlikes = await FileHandler.parseJsonFile(searchAlikesPath)
|
||||
global.searchAlikes = await FileHandler.parseJsonFile(searchAlikesPath)
|
||||
let crawlTime = 0;
|
||||
let queryCount = 0;
|
||||
let fileCount = 0;
|
||||
|
||||
Reference in New Issue
Block a user