fix(rtrvc): parameter issue

fumiama · fumiama · commit 3a79d81907e9 · 2024-06-16T17:25:01.000+09:00
diff --git a/infer/lib/rtrvc.py b/infer/lib/rtrvc.py
@@ -213,8 +213,7 @@ def infer(
                 pitch, pitchf = self._get_f0(
                     input_wav[-f0_extractor_frame:],
                     self.f0_up_key - self.formant_shift,
-                    3,
-                    f0method,
+                    method=f0method,
                 )
             shift = block_frame_16k // self.window
             self.cache_pitch[:-shift] = self.cache_pitch[shift:].clone()
@@ -297,7 +296,7 @@ def _get_f0_post(self, f0, f0_up_key):
     def _get_f0_pm(self, x, f0_up_key, filter_radius):
         if not hasattr(self, "pm"):
             self.pm = PM(hop_length=160, sampling_rate=16000)
-        f0 = self.pm.compute_f0(x)
+        f0 = self.pm.compute_f0(x.cpu().numpy())
         return self._get_f0_post(f0, f0_up_key)
 
     def _get_f0_harvest(self, x, f0_up_key, filter_radius=3):
@@ -309,7 +308,7 @@ def _get_f0_harvest(self, x, f0_up_key, filter_radius=3):
                 self.sr,
             )
         if filter_radius is None: filter_radius=3
-        f0 = self.harvest.compute_f0(x, filter_radius=filter_radius)
+        f0 = self.harvest.compute_f0(x.cpu().numpy(), filter_radius=filter_radius)
         return self._get_f0_post(f0, f0_up_key)
 
     def _get_f0_dio(self, x, f0_up_key, filter_radius):
@@ -320,7 +319,7 @@ def _get_f0_dio(self, x, f0_up_key, filter_radius):
                 self.f0_max,
                 self.sr,
             )
-        f0 = self.dio.compute_f0(x)
+        f0 = self.dio.compute_f0(x.cpu().numpy())
         return self._get_f0_post(f0, f0_up_key)
 
     def _get_f0_crepe(self, x, f0_up_key, filter_radius):
diff --git a/rvc/layers/synthesizers.py b/rvc/layers/synthesizers.py
@@ -188,7 +188,7 @@ def infer(
             flow_head = head - 24
             if flow_head < 0: flow_head = 0
             dec_head = head - flow_head
-            m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths, head)
+            m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths, flow_head)
             z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
             z = self.flow(z_p, x_mask, g=g, reverse=True)
             z = z[:, :, dec_head : dec_head + length]