Parallel Searching

2022-11-29 22:54:14 +01:00 · 2022-11-29 22:54:14 +01:00 · d12d67c4e6
commit d12d67c4e6
parent 593ee0caa6
5 changed files with 61 additions and 50 deletions
--- a/syng/client.py
+++ b/syng/client.py
@ -102,6 +102,8 @@ async def preview(entry: Entry) -> None:
            "--sub-file=-",
            "--fullscreen",
            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
        )
        await process.communicate(subtitle.encode())

--- a/syng/server.py
+++ b/syng/server.py
@ -415,16 +415,22 @@ async def handle_search(sid: str, data: dict[str, str]) -> None:

    query = data["query"]
    result_futures = []
-    for source in state.config.sources_prio:
-        loop = asyncio.get_running_loop()
-        search_future = loop.create_future()
-        loop.create_task(state.config.sources[source].search(search_future, query))
-        result_futures.append(search_future)
+    results_list = await asyncio.gather(
+        *[
+            state.config.sources[source].search(query)
+            for source in state.config.sources_prio
+        ]
+    )
+    # for source in state.config.sources_prio:
+    #     loop = asyncio.get_running_loop()
+    #     search_future = loop.create_future()
+    #     loop.create_task(state.config.sources[source].search(search_future, query))
+    #     result_futures.append(search_future)

    results = [
        search_result
-        for result_future in result_futures
-        for search_result in await result_future
+        for source_result in results_list
+        for search_result in source_result
    ]
    await sio.emit(
        "search-results",
--- a/syng/sources/s3.py
+++ b/syng/sources/s3.py
@ -1,5 +1,4 @@
-from json import load, dump
-from time import sleep, perf_counter
+# from json import load, dump
 from itertools import zip_longest
 import asyncio
 import os
@ -49,15 +48,16 @@ class S3Source(Source):
    async def get_config(self) -> dict[str, Any] | list[dict[str, Any]]:
        def _get_config() -> dict[str, Any] | list[dict[str, Any]]:
            if not self.index:
-                print(f"Indexing {self.bucket}")
-                # self.index = [
-                #     obj.object_name
-                #     for obj in self.minio.list_objects(self.bucket, recursive=True)
-                # ]
+                print(f"s3: Indexing '{self.bucket}'")
+                self.index = [
+                    obj.object_name
+                    for obj in self.minio.list_objects(self.bucket, recursive=True)
+                ]
+                print("s3: Indexing done")
                # with open("s3_files", "w") as f:
                #     dump(self.index, f)
-                with open("s3_files", "r") as f:
-                    self.index = [item for item in load(f) if item.endswith(".cdg")]
+                # with open("s3_files", "r") as f:
+                #     self.index = [item for item in load(f) if item.endswith(".cdg")]

            chunked = zip_longest(*[iter(self.index)] * 1000, fillvalue="")
            return [
@ -69,10 +69,7 @@ class S3Source(Source):
    def add_to_config(self, config: dict[str, Any]) -> None:
        self.index += config["index"]

-    async def search(
-        self, result_future: asyncio.Future[list[Result]], query: str
-    ) -> None:
-        print("searching s3")
+    async def search(self, query: str) -> list[Result]:
        filtered: list[str] = self.filter_data_by_query(query, self.index)
        results: list[Result] = []
        for filename in filtered:
@ -80,7 +77,7 @@ class S3Source(Source):
            if result is None:
                continue
            results.append(result)
-        result_future.set_result(results)
+        return results

    async def get_missing_metadata(self, entry: Entry) -> dict[str, Any]:
        def mutagen_wrapped(file: str) -> int:
@ -109,12 +106,12 @@ class S3Source(Source):
        target_file_mp3: str = target_file_cdg[:-3] + "mp3"
        os.makedirs(os.path.dirname(target_file_cdg), exist_ok=True)

-        video_task: asyncio.Task[None] = asyncio.create_task(
+        video_task: asyncio.Task[Any] = asyncio.create_task(
            asyncio.to_thread(
                self.minio.fget_object, self.bucket, entry.id, target_file_cdg
            )
        )
-        audio_task: asyncio.Task[None] = asyncio.create_task(
+        audio_task: asyncio.Task[Any] = asyncio.create_task(
            asyncio.to_thread(
                self.minio.fget_object, self.bucket, ident_mp3, target_file_mp3
            )
--- a/syng/sources/source.py
+++ b/syng/sources/source.py
@ -42,15 +42,17 @@ class Source:
            [f"--audio-file={audio}"] if audio else []
        )

-        mpv_process = asyncio.create_subprocess_exec("mpv", *args)
+        mpv_process = asyncio.create_subprocess_exec(
+            "mpv",
+            *args,
+            stdout=asyncio.subprocess.PIPE,
+        )
        return await mpv_process

    async def get_entry(self, performer: str, ident: str) -> Entry:
        raise NotImplementedError

-    async def search(
-        self, result_future: asyncio.Future[list[Result]], query: str
-    ) -> None:
+    async def search(self, query: str) -> list[Result]:
        raise NotImplementedError

    async def doBuffer(self, entry: Entry) -> Tuple[str, Optional[str]]:
--- a/syng/sources/youtube.py
+++ b/syng/sources/youtube.py
@ -63,21 +63,22 @@ class YoutubeSource(Source):

        return 1 - (hits / len(queries))

-    async def search(
-        self, result_future: asyncio.Future[list[Result]], query: str
-    ) -> None:
-        def _search(result_future: asyncio.Future[list[Result]], query: str) -> None:
+    async def search(self, query: str) -> list[Result]:
        results: list[YouTube] = []
-            for channel in self.channels:
-                results += self._channel_search(query, channel)
-            search_results: Optional[list[YouTube]] = Search(query + " karaoke").results
-            if search_results is not None:
-                results += search_results
+        results_lists: list[list[YouTube]] = await asyncio.gather(
+            *[
+                asyncio.to_thread(self._channel_search, query, channel)
+                for channel in self.channels
+            ],
+            asyncio.to_thread(self._yt_search, query),
+        )
+        results = [
+            search_result for yt_result in results_lists for search_result in yt_result
+        ]

        results.sort(key=partial(self._contains_index, query))

-            result_future.set_result(
-                [
+        return [
            Result(
                id=result.watch_url,
                source="youtube",
@ -87,9 +88,12 @@ class YoutubeSource(Source):
            )
            for result in results
        ]
-            )

-        await asyncio.to_thread(_search, result_future, query)
+    def _yt_search(self, query: str) -> list[YouTube]:
+        results = Search(f"{query} karaoke").results
+        if results is not None:
+            return results
+        return []

    def _channel_search(self, query: str, channel: str) -> list[YouTube]:
        browse_id: str = Channel(f"https://www.youtube.com{channel}").channel_id