import { expandGlob } from 'std/fs/mod.ts'; import { config } from "std/dotenv/mod.ts"; import { chunk } from "https://deno.land/std/collections/chunk.ts" import { Client as ElasticsearchClient, DocumentsBulkRequest } from "https://deno.land/x/elasticsearch@v8.3.3/mod.ts"; import { Doc, DocParser, readDoc } from './doc_load/load.ts'; import ProgressBar from 'https://deno.land/x/progress@v1.3.0/mod.ts'; import { Command } from "cliffy"; const env = await config({ export: true }); const client = new ElasticsearchClient({ node: 'https://localhost:9200', auth: { username: env['ELASTIC_USERNAME'], password: env['ELASTIC_PASSWORD'], } }); async function createIndex() { const res = await client.indices.create({ index: 'github-awesome', body: { mappings: { properties: { "name": { type: "text", }, "desc": { type: "text", }, "url": { type: "keyword", }, "star": { type: "integer", }, "fork": { type: "integer", }, "author": { type: "keyword", }, "tags": { type: "keyword", "ignore_above": 256, }, "readme": { type: "text", }, }, }, }, }); console.log(res); console.log(res.acknowledged ? 'Index created' : 'Index creation failed'); } async function deleteIndex() { const res = await client.indices.delete({ index: 'github-awesome', }); console.log(res); console.log(res.acknowledged ? 'Index deleted' : 'Index deletion failed'); } async function bulkIndex(path: string[],{ chunkSize = 1000, progressBar = false, }) { const ch = chunk(path, chunkSize); const bar = new ProgressBar({ total: ch.length, title: 'Indexing', width: 50, }); let i = 0; for (const pathes of ch) { const docs = await Promise.all(pathes.map(async (path) => { const doc = await readDoc(path); if (doc.from_url){ delete doc.from_url; } return [ { index: { _id: doc.author+"/"+doc.name, } }, doc ] as [{ index: { _id: string } }, Doc]; } )); const res = await client.documents.bulk({ target: 'github-awesome', body: docs.flat(), } as DocumentsBulkRequest) as { errors: boolean, took: number, items: unknown[] }; if (res.errors){ if (progressBar){ bar.console("error occurs!") } else { console.log("error occurs!") } } if (progressBar) { bar.render(++i); } } if (progressBar) { bar.end(); } } async function test_search(query: string, { size = 10, from = 0, }) { const res = await client.search({ target: 'github-awesome', body: { query: { multi_match: { query, fields: ['name', 'desc', 'tags', 'author', 'readme'], } }, from, size, }, }); return res.hits.hits; } async function main() { const cmd = new Command(); cmd .name('github-awesome') .version('0.1.0') .description('github-awesome search engine cli'); cmd .command('index [path...]') .description('index github-awesome. glob pattern is supported.') .option('-c, --chunk-size ', 'chunk size', { default: 200, }) .option('-p, --progress-bar', 'show progress bar') .action(async ({chunkSize, progressBar}, ...path: string[]) => { const pathes = []; for (const p of path) { for await (const iterator of expandGlob(p)) { pathes.push(iterator.path); } } if (pathes.length === 0) { console.log('no path found'); return; } await bulkIndex(pathes, { chunkSize, progressBar }); }); cmd .command('search ') .description('search github-awesome') .option('-s, --size ', 'size', { default: 10, }) .option('-f, --from ', 'from', { default: 0, }) .option('-j, --json', 'output json') .action(async ({size, from, json}, query: string) => { const s = await test_search(query, { size, from, }); if (s.length === 0) { console.log('no result found'); return; } if (json) { console.log(JSON.stringify(s, null, 2)); } else { for (const doc of s) { console.log("id :",doc._id); console.log("score :",doc._score); console.log(); } } }); cmd .command('create-index') .description('create index') .action(async () => { await createIndex(); }); cmd .command('delete-index') .description('delete index') .action(async () => { await deleteIndex(); }); await cmd.parse(Deno.args); } if (import.meta.main) { await main(); }