,但是最近我从ZX Spectrum找到了一个非常老的TTS(链接中的详细信息,并且还链接到原始Tap文件存储库),它真的很好,很简单(Z801 asm代码仅为801字节)。因此,我尝试了一下,将其反汇编(通过我自己的实用程序从tap文件中提取基本文件和asm并使用YAZD进行了反汇编),并将结果成功移植到C ++。在PC和MCU上听起来都不错,而所需的CPU电源却很少。它会产生1位数字声音。我对此一无所知...它是对样本的某种压缩,还是使用共振峰滤波器来合成声音或将它们组合或其他?
所以我想剖析
say_char
函数使其有意义/ tab_char?[]
LUT表的含义。[Edit2]多亏了Edward新增了像版本一样的C / C ++
我重新排列了表并添加了许多注释信息,以使教学更具实用性并可以进行调整:
//---------------------------------------------------------------------------
//--- ZX Hlasovy program voicesoft 1985 -----------------------------------
//--- ported to C++ by Spektre ver: 1.001 -----------------------------------
//---------------------------------------------------------------------------
#ifndef _speech_h
#define _speech_h
//---------------------------------------------------------------------------
// API:
void sound_out(bool on); // you need to code this function (should add a sample to sound output)
void say_text(char *txt); // say null terminated text, "a'c'" -> "áè"
//---------------------------------------------------------------------------
// internals:
void say_char(char chr); // internal function for single character (do not use it !!!)
void say_wait(WORD ws); // internal wait (do not use it !!!)
//---------------------------------------------------------------------------
// vars:
bool _sound_on=false; // global state of the reproductor/sound output
//---------------------------------------------------------------------------
// config: (recomputed for 44100 Hz samplerate)
const static BYTE t_speed=5; // [samples] 1/(speech speed) (pitch)
const static WORD t_pause=183; // [samples] pause between chars
const static WORD t_space=2925; // [samples] pause ` `
const static WORD t_comma=5851; // [samples] pause `,`
//---------------------------------------------------------------------------
// tables:
const static BYTE tab_char0[52]= // 0..25 normal alphabet A..Z
{ // 26..51 diacritic alphabet A..Z
0x00,0x02,0x06,0x0a,0x0e,0x10,0x12,0x16,0x1a,0x1c,0x22,0x26,0x2a,0x2e,0x32,
0x34,0x38,0x42,0x48,0x4a,0x4e,0x50,0x50,0x56,0x1a,0x5c,0x64,0x66,0x70,0x74,
0x7a,0x7c,0xc2,0x84,0x86,0xc2,0xc2,0xc2,0x88,0x8c,0x92,0x94,0xc2,0x9e,0xa6,
0xa8,0xae,0xb0,0xc2,0xc2,0x86,0xbc
};
const static BYTE tab_char1[196]=
{
0x36,0x81,0x34,0x19,0x31,0xab,0x18,0x19,0x91,0xc3,0x34,0x19,0x31,0xe0,0x36,
0x84,0x92,0xe3,0x35,0x19,0x51,0x9c,0x31,0x31,0x34,0x96,0x36,0x87,0x33,0x3a,
0x32,0x3d,0x32,0xc0,0x18,0x19,0x51,0x9c,0x33,0x22,0x31,0xb1,0x31,0x31,0x36,
0xa5,0x31,0x31,0x36,0xa8,0x36,0x8a,0x18,0x19,0x31,0xab,0x18,0x19,0x51,0x1c,
0x34,0x31,0x32,0x34,0x32,0xb7,0x22,0x10,0x13,0x19,0x21,0xae,0x92,0xc3,0x18,
0x19,0x31,0xe0,0x36,0x8d,0x34,0x31,0x32,0x34,0x32,0xb7,0x18,0x19,0x71,0x1c,
0x92,0xc3,0x32,0x31,0x32,0x43,0x32,0x44,0x32,0xc5,0x3f,0x81,0x34,0x19,0x31,
0x2b,0x33,0x3a,0x32,0x3d,0x32,0xc0,0x18,0x19,0x91,0xd3,0x33,0x19,0x71,0x6d,
0x32,0x93,0x3e,0x84,0x92,0x63,0x33,0x3a,0x32,0x3d,0x32,0xc0,0x92,0xf3,0x3e,
0x87,0x31,0x31,0x36,0x25,0x31,0x31,0x35,0x25,0x32,0x93,0x3e,0x8a,0x18,0x19,
0x31,0x2b,0x33,0x3a,0x32,0x3d,0x32,0xc0,0x13,0x19,0x32,0x60,0x13,0x19,0x71,
0xdd,0x92,0xd3,0x18,0x19,0x71,0x6d,0x32,0x93,0x3e,0x8d,0x34,0x31,0x32,0x34,
0x32,0x37,0x33,0x3a,0x32,0x3d,0x32,0xc0,0x32,0x53,0x32,0x54,0x32,0xd5,0x1a,
0x99
};
const static BYTE tab_char2[262]=
{
0x1a,0x99,0xe1,0xc3,0xe1,0xc7,0x8f,0x0f,0xf8,0x03,0x0f,0x07,0xc1,0xe3,0xff,
0x40,0x17,0xff,0x00,0x03,0xf8,0x7c,0xc1,0xf1,0xf8,0x03,0xfe,0x00,0x7f,0xfc,
0x00,0x03,0xf8,0x0f,0x09,0xf1,0xfe,0x03,0xef,0x40,0x17,0xff,0x00,0x03,0xe1,
0x5c,0x35,0xc5,0xaa,0x35,0x00,0x00,0x00,0x00,0x00,0x00,0x3e,0x8e,0x38,0x73,
0xcf,0xf8,0x78,0xc3,0xdf,0x1c,0xf1,0xc7,0xfe,0x03,0xc0,0xff,0x00,0x00,0xff,
0xf8,0x00,0x7f,0xf8,0x03,0xff,0xf0,0x01,0xff,0xe0,0x03,0xaa,0xca,0x5a,0xd5,
0x21,0x3d,0xfe,0x1f,0xf8,0x00,0x00,0x1f,0xff,0xfc,0x20,0x00,0x00,0x03,0xff,
0xff,0x08,0x79,0x00,0x02,0xff,0xe1,0xc7,0x1f,0xe0,0x03,0xff,0xd0,0x01,0xff,
0xf0,0x03,0x7f,0x01,0xfa,0x5f,0xc0,0x07,0xf8,0x0f,0xc0,0xff,0x00,0x42,0xaa,
0xa5,0x55,0x5a,0xaa,0xaa,0x5a,0xa5,0x5a,0xaa,0x55,0x55,0xaa,0xaa,0xa5,0x55,
0xaa,0x5a,0xaa,0xa5,0x55,0xaa,0xaa,0xa5,0x55,0xaa,0xaa,0x55,0xa5,0xa5,0xaa,
0xa5,0xb7,0x66,0x6c,0xd8,0xf9,0xb3,0x6c,0xad,0x37,0x37,0x66,0xfc,0x9b,0x87,
0xf6,0xc0,0xd3,0xb6,0x60,0xf7,0xf7,0x3e,0x4d,0xfb,0xfe,0x5d,0xb7,0xde,0x46,
0xf6,0x96,0xb4,0x4f,0xaa,0xa9,0x55,0xaa,0xaa,0xa5,0x69,0x59,0x9a,0x6a,0x95,
0x55,0x95,0x55,0x6a,0xa5,0x55,0xa9,0x4d,0x66,0x6a,0x92,0xec,0xa5,0x55,0xd2,
0x96,0x55,0xa2,0xba,0xcd,0x00,0x66,0x99,0xcc,0x67,0x31,0x8e,0x66,0x39,0xa6,
0x6b,0x19,0x66,0x59,0xc6,0x71,0x09,0x67,0x19,0xcb,0x01,0x71,0xcc,0x73,0x19,
0x99,0xcc,0xc6,0x67,0x19,0x9a,0xc6,
};
const static BYTE tab_char3[5]={ 0x00,0x2e,0x5a,0x5e,0xfe };
//---------------------------------------------------------------------------
void say_text(char *txt)
{
WORD hl;
BYTE a,b,c;
for (b=0xBB,hl=0;;hl++) // process txt
{
a=b; // a,c char from last iteration
c=b;
if (!a) break; // end of txt
b=txt[hl]; // b actual char
if ((b>='a')&&(b<='z')) b=b+'A'-'a'; // must be uppercase
a=c;
if ((a>='A')&&(a<='Z'))
{
// handle diacritic
if (a!='C'){ a=b; if (a!='\'') a=c; else{ a=c; a+=0x1A; b=0xBB; }}
else{
a=b;
if (a=='H'){ a+=0x1A; b=0xBB; }
else{ if (a!='\'') a=c; else{ a=c; a+=0x1A; b=0xBB; }}
}
// syntetize sound
say_char(a);
continue;
}
if (a==',')say_wait(t_comma);
if (a==' ')say_wait(t_space);
}
}
//----------------------------------------------------------------------
void say_wait(WORD ws)
{
for (;ws;ws--) sound_out(_sound_on);
}
//----------------------------------------------------------------------
void say_char(char chr) // chr = < `A` , `Z`+26 >
{
WORD hl,hl0;
BYTE a,b,c,cy,cy0,ws;
hl=tab_char0[chr-'A'];
for (;;)
{
c =tab_char1[hl ]&0x0F;
c|=tab_char1[hl+1]&0x80;
for (;;)
{
a=tab_char1[hl];
a=(a>>5)&7;
cy=a&1;
hl0=hl;
if (a!=0)
{
b=tab_char3[a];
hl=hl0;
a=tab_char1[hl+1];
hl0=hl;
cy0=(a>>7)&1;
a=((a<<1)&254)|cy;
cy=cy0;
hl=a;
a=0x80;
for (;;)
{
_sound_on=(a&tab_char2[hl]);
for (ws=t_speed;ws;ws--) sound_out(_sound_on);
b--;
if (!b) break;
cy=a&1;
a=((a>>1)&127)|(cy<<7);
if (!cy) continue;
hl++;
}
}
a^=a;
say_wait(t_pause);
c--;
a=c&0x0F;
hl=hl0;
if (a==0) break;
}
cy0=(c>>7)&1;
a=((c<<1)&254)|cy;
cy=cy0;
if (cy) return;
hl+=2;
}
}
//---------------------------------------------------------------------------
#endif
//---------------------------------------------------------------------------
#1 楼
我完全听不懂Hlasový计划的演讲,但也许它适合您的需求。我对这个特定软件没有任何特定的了解,但是基于发布时间和大小,几乎毫无疑问是基于共振峰的系统。典型的软件(在那个年代的8位计算机上)使用文本到音素然后音素到共振峰的转换。或某人现在已移植到Javascript的“软件自动化的口”。可以从那里获取更多链接,包括反向工程C代码。程序
,这里是您反向工程软件的进一步分析。我将告诉您我是如何做到的以及如何显示结果。首先,我开始研究最内层的循环,并相继重写了它,每次都测试结果,以确保在每个步骤中产生相同的结果。然后,我对函数的越来越大的部分进行了重复。我还重命名并添加了变量,以使它们更好地反映软件实际使用它们的方式。尽管Z80可以使用的寄存器受到限制(以及这些寄存器可以使用的功能),但在C ++中我们没有相同的限制,因此为清楚起见,代码被重写。 br />这是解释。首先,我重命名了结构:
void say_char(char chr) // chr = < `A` , `Z`+26 >
{
const Chain *chain = &chain_sequence[chain_start[chr - 'A']];
for (BYTE c=0; (c & 0x80) == 0; ++chain) {
// count is in low four bits of c, end flag is high bit
for (c = chain->copies_and_end(); c & 0xf; --c) {
BYTE a = chain->numbits_lookup();
if (a != 0) {
BYTE bitcount = num_bits[a];
BYTE bitloc = chain->start_index();
// bitcount is the number of bits to emit
// starting with the MSB of sound_bits[bitloc]
for ( ;bitcount; ++bitloc) {
for (BYTE mask = 0x80; mask; mask >>= 1) {
_sound_on = (mask & sound_bits[bitloc]);
for (BYTE ws = t_speed; ws; ws--)
sound_out(_sound_on);
if (--bitcount == 0)
break;
}
}
}
say_wait(t_pause);
}
}
}
然后我修改了
chain_sequence
,使其改用两字节的C ++结构。定义是这样的:tab_char0 --> chain_start
tab_char1 --> chain_sequence
tab_char2 --> sound_bits
tab_char3 --> num_bits
由于此更改,我不得不修改
chain_start
表以将每个条目减半。如何工作
对于每个字母,代码从在
chain_start
表中查找开始。这是chain_sequence
表的索引。如果我们选择该表中的前三个条目,它们将如下所示:struct Chain {
// bits: 7 6 5 4 3 2 1 0
BYTE a; // m2 m1 c0 - l3 l2 l1 l0
BYTE b; // end | c7 c6 c5 c4 c3 c2 c1
bool end() const { return b & 0x80; }
BYTE copies() const { return a & 0x0F; }
BYTE start_index() const { return ((b & 0x7f) << 1) | ((a & 0x20) >> 5); }
BYTE copies_and_end() const {
return (a & 0x0F) | (b & 0x80);
}
BYTE numbits_lookup() const {
return (a >> 5) & 7;
}
friend std::ostream& operator<<(std::ostream& out, const Chain& ch) {
return out
<< "copies = " << unsigned(ch.copies())
<< ", start_index = " << unsigned(ch.start_index())
<< ", numbits_lookup = " << unsigned(ch.numbits_lookup())
<< ", end = " << std::boolalpha << bool(ch.b & 0x80)
<< ", useless = " << bool(ch.a & 0x10);
}
};
每一个都是一个链序列,最后一项由第二个字节的高位标识。对于字母“ A”,它翻译为:
const static Chain chain_sequence[98] = {
/* A = 0 */ { 0x36, 0x81, },
/* B = 1 */ { 0x34, 0x19, }, { 0x31, 0xab, },
/* C = 3 */ { 0x18, 0x19, }, { 0x91, 0xc3, },
那么,这意味着该代码创建了六个位模式副本。每个副本以
t_pause
零位结尾。对于每个副本的开始位,代码使用numbits_lookup
值在5字节num_bits
中查找所需的长度。因此,对于“ A”,查找为1,对应于0x2e = 46,但是代码编写的方式实际上对应于实际发出的少一位,或者在这种情况下为45。接下来,它使用
start_index
作为sound_bits
的索引。然后,将表中的每个字节从每个字节的最高有效位开始移出。因此,在这种情况下,索引3和45位的长度对应于表中的这些条目:因此,这样做的结果是输出对应于此的六个副本:如果仔细观察,将不会使用我所说的
Chain
中的一位(第一个字节的位4),而其他一位被使用两次(第一个字节的位5)。确实,我反汇编了原始的Z80代码并发现了这一点:copies = 6, start_index = 3, numbits_lookup = 1, end = true
您的代码似乎暗示调用我标记为
start_index()
的东西时进位位已置位,但更接近相关的rla
指令创建sound_bits
索引字节后,进位位保证为零。如上所述,加法指令不会溢出,因此会清除进位位。从那里到rla
指令都没有一条指令会改变进位位,因此此时该位为零。 >似乎没有很多重叠的数据,但是可能有。字母之一的链序列被重新使用。我尚未解码此处使用的实际变音符号,但如果将后26个字母指定为A'至Z',则M'的字母将从索引68开始,包括5个链段。 N'的一个使用这些段中的最后三个。对于同一元音的短和长版本,例如A和A'(带有čárka的A表示捷克语中的长元音),当前代码也重复链令牌,但序列更长。可以将它们组合在一起并使用单个位标志来表示元音。
在16位计算机上,通过重组数据可以使其效率更高。也可以将其修改为在嵌入式系统上由事件驱动。例如,这可以由计时器中断来中断驱动。或者可以创建一个样本队列,然后使用DMA传输将其发送给扬声器。
物理
这是通过一系列位(最低45)来创建最低频率,然后
sound_bits
零。较高的频率在每个副本的前导位模式内创建。不出所料,这是一种基于共振峰的合成器,具有较低的分辨率。
评论
不错的链接,是的,这些都是使用测角技术的标准共振峰...但是我看到的ZX TTS引擎仅使用位掩码,位移位和LUT(甚至不是整数算术),而在任何地方都没有任何测角学(它们与算术btw以不同方式完成)。我对它背后的科学/技巧感兴趣...也许是某种简化,因为输出只是1位数字而不是DAC ...顺便说一句,Hlasovy程序是用于斯拉夫语言的。您重新输入注音(也许这就是为什么您不理解它的原因)
– Spektre
12月15日19:44
如果有时间,我会更深入地研究并更新我的答案。
–爱德华
12月15日19:52
它确实像您描述的那样工作。我正在将表和代码重写为更多的C ++,并且也可以对其进行调整...完成后,我会将其添加到我的请求中。顺便说一句,看起来我是正确的,对PCM采样的共振峰基调进行RLE压缩:)如果我能做的话,敬畏的工作将使投票更多...而且我认为您为RLA修复cy行为听起来还不错...重组了2张桌子,...
– Spektre
12月17日19:21
好的,在您理解表格功能之后,我添加了我的重新编码语音版本。非常感谢您的辛勤工作。
– Spektre
12月17日20:23
我对所有表进行了完全的重做,从而对代码进行了一些更新,也摆脱了位表,并解剖了很多PCM样品...
– Spektre
12月18日12:47