From 8b23384840b5eb18b3e35a9dc4596ad456a03d8e Mon Sep 17 00:00:00 2001 From: Luis Nassif Date: Sun, 28 Apr 2024 16:35:29 -0300 Subject: [PATCH] '#1823: redirect warmless console messages to log --- .../resources/scripts/tasks/WhisperProcess.py | 4 +- .../transcript/Wav2Vec2TranscriptTask.java | 2 +- .../transcript/WhisperTranscriptTask.java | 40 +++++++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/iped-app/resources/scripts/tasks/WhisperProcess.py b/iped-app/resources/scripts/tasks/WhisperProcess.py index f0f68c5ffe..8e2b6456b1 100644 --- a/iped-app/resources/scripts/tasks/WhisperProcess.py +++ b/iped-app/resources/scripts/tasks/WhisperProcess.py @@ -37,7 +37,7 @@ def main(): deviceNum = 0 try: - model = whisperx.load_model(modelName, device=deviceId, device_index=deviceNum, threads=threads, compute_type=compute_type) + model = whisperx.load_model(modelName, device=deviceId, device_index=deviceNum, threads=threads, compute_type=compute_type, language=language) except Exception as e: if deviceId != 'cpu': @@ -45,7 +45,7 @@ def main(): print('FAILED to load model on GPU, fallbacking to CPU!', file=sys.stderr) deviceId = 'cpu' compute_type = 'int8' - model = whisperx.load_model(modelName, device=deviceId, device_index=deviceNum, threads=threads, compute_type=compute_type) + model = whisperx.load_model(modelName, device=deviceId, device_index=deviceNum, threads=threads, compute_type=compute_type, language=language) else: raise e diff --git a/iped-engine/src/main/java/iped/engine/task/transcript/Wav2Vec2TranscriptTask.java b/iped-engine/src/main/java/iped/engine/task/transcript/Wav2Vec2TranscriptTask.java index 8cd05ae366..84a92dca8a 100644 --- a/iped-engine/src/main/java/iped/engine/task/transcript/Wav2Vec2TranscriptTask.java +++ b/iped-engine/src/main/java/iped/engine/task/transcript/Wav2Vec2TranscriptTask.java @@ -40,7 +40,7 @@ public class Wav2Vec2TranscriptTask extends AbstractTranscriptTask { private static LinkedBlockingDeque deque = new LinkedBlockingDeque<>(); - private static volatile Level logLevel = Level.forName("MSG", 250); + protected static volatile Level logLevel = Level.forName("MSG", 250); static class Server { Process process; diff --git a/iped-engine/src/main/java/iped/engine/task/transcript/WhisperTranscriptTask.java b/iped-engine/src/main/java/iped/engine/task/transcript/WhisperTranscriptTask.java index d4d05744d0..6a77455cca 100644 --- a/iped-engine/src/main/java/iped/engine/task/transcript/WhisperTranscriptTask.java +++ b/iped-engine/src/main/java/iped/engine/task/transcript/WhisperTranscriptTask.java @@ -3,7 +3,10 @@ import java.io.BufferedReader; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; +import java.util.Arrays; +import java.util.List; import org.apache.commons.lang3.SystemUtils; import org.apache.logging.log4j.LogManager; @@ -96,4 +99,41 @@ protected TextAndScore transcribeAudio(File tmpFile) throws Exception { return transcribeWavPart(tmpFile); } + @Override + protected void logInputStream(InputStream is) { + List ignoreMsgs = Arrays.asList( + "With dispatcher enabled, this function is no-op. You can remove the function call.", + "torchvision is not available - cannot save figures", + "Lightning automatically upgraded your loaded checkpoint from", + "Model was trained with pyannote.audio 0.0.1, yours is", + "Model was trained with torch 1.10.0+cu102, yours is"); + Thread t = new Thread() { + public void run() { + byte[] buf = new byte[1024]; + int read = 0; + try { + while ((read = is.read(buf)) != -1) { + String msg = new String(buf, 0, read).trim(); + boolean ignore = false; + for (String i : ignoreMsgs) { + if (msg.contains(i)) { + ignore = true; + break; + } + } + if (ignore) { + logger.warn(msg); + } else { + logger.log(logLevel, msg); + } + } + } catch (IOException e) { + e.printStackTrace(); + } + } + }; + t.setDaemon(true); + t.start(); + } + }