ESpeakEngine - Objective-C speech synthesizer

Jozef Bozek

Rate me:

4.80/5 (3 votes)

23 Jan 2012BSD2 min read

73.7K

ESpeakEngine - Objective-C speech synthesizer

ESpeakTest.zip
- __MACOSX
  - ESpeakTest
    - ._.DS_Store
    - .git
      - info
        
        ._exclude
    - ESpeakTest
      - ._AppDelegate.h
      - ._AppDelegate.m
      - ._ESpeakTest-Prefix.pch
      - ._main.m
      - ._ViewController.h
      - ._ViewController.m
      - en.lproj
        
        ._InfoPlist.strings
        
        ._ViewController_iPad.xib
    - ESpeakTestTests
      - ._ESpeakTestTests.h
      - ._ESpeakTestTests.m
      - en.lproj
        
        ._InfoPlist.strings
- ESpeakTest
  - .DS_Store
  - .git
    - branches
    - COMMIT_EDITMSG
    - config
    - description
    - HEAD
    - hooks
      - applypatch-msg.sample
      - commit-msg.sample
      - post-commit.sample
      - post-receive.sample
      - post-update.sample
      - pre-applypatch.sample
      - pre-commit.sample
      - prepare-commit-msg.sample
      - pre-rebase.sample
      - update.sample
    - index
    - info
      - exclude
    - logs
      - HEAD
      - refs
        
        heads
        
        master
    - objects
      - 00
        
        e88867c94b8f6728422a4ad099a53a9f392cb7
      - 01
        
        477be5465c04d4cfd46ab95003f17445731d74
      - 02
        
        37597b848a1890bb30ba0ff4102f8107cafdcb
        
        6363f6a3bb9fe78b03b09f4d0fdce472b95e20
        
        69a98a29eb1baf79dd1f6f2bbc9370b9c4c9cb
        
        78ea2136d97866104e3789d7950d39632b5046
      - 03
        
        7b0f820ace631b85715dfb13cc21fe26daa44d
        
        dac4f6baff6f5a2b06f5a68b6daa265c16e0a2
        
        edde41daf54530ac1fe426e9349a5544b48ccc
      - 05
        
        b2d0d21b687b4190600f6ea0206972c960dd1c
      - 06
        
        23379b22e1417ed806ff4c48337afbe9457cdf
      - 0c
        
        2d13a65548266d9c8a6758599c9a86faf53b28
      - 0d
        
        51695daffab058d065e942018f5c56067942fe
        
        6fa2a91273ee3271fb1d3202d7f6ad86379ef7
      - 10
        
        e91b22ea1e6c67962cd858e8d03de74e7abf6f
      - 11
        
        8095eb5859b9c2ee14be00adeb87ee66eaec94
      - 12
        
        83826f9250bed7326aed2542a0e3c5b47edbec
        
        ce1096a4ef6c4538a43ebd951dcaf2b624b717
      - 13
        
        664a34fe6a3b585603e00df15b17bcef3591e0
        
        93b3551c88468dccede7550726aae27a931555
      - 14
        
        1160885f6955ee4855933ce984bc117649d220
        
        417c1fdfdb3af16ef717d90a078f5c9b8468c3
      - 15
        
        96e3c77205e1688ad5968359106b5ea92d322a
      - 17
        
        004519410f433308228397abc30d2b69e55b26
      - 19
        
        3700525eaa0aa02256a4211c820007bb005995
        
        da34a5b9b6e585ece12d4a6ec24341fa98f50c
        
        fd69d3bec6857b0d4b29f4f26c03ac0dc727e3
      - 1a
        
        61238c5c772724cb12490eb555630bcc0e7870
        
        9e53b6e584bd1e8245f3fc69751211b7d4e86a
      - 1c
        
        2992dd5455746aef85905cc0293031e3e9ceaa
        
        a6f6abadb2e86be2baf2f823a27d16c43d2528
      - 1e
        
        9a757f891fd7ece2ff350dc9225837cb7e70f7
      - 1f
        
        2eb929019174a9b4c2654c1398ed26aca50e2c
      - 20
        
        5e59c21f973ea13de6deab139efee2f171027a
      - 21
        
        4d6f9d708a7691c1abf13b0801ee9a54a44ae6
      - 22
        
        a3b80f6954fd663b59f66210e7b658c5db3a3f
        
        a95d18a3ead5d0274abab7986121dccafc1842
      - 23
        
        8c69126e18387f9c2b3d059db6d7df1cd91a6f
      - 25
        
        55d5d8bec5ee0524034d96ea5aa95eeb1a90dd
        
        5fe35ca3bf7c8743abf0f95518aae822aac4f0
        
        bfdbf47ea384bd8118ed197a8842cb2104eb2a
        
        d78c05ca841d2019f204184927282dc69ebd61
      - 28
        
        59e093f75f1f88ca412e0bde9345afc01f15ac
      - 29
        
        91e99d136fe8e13f7c0c99eaa27c8ca397186d
      - 2a
        
        736d4d3f3c3ffe36bea620d6c28b672efc5867
        
        fb1adc71b7f4e90bd02355dd8cdb75e8d23870
      - 2e
        
        1534872410e5819a69396cdbb57225baa3a888
        
        9dd1f58881b69e931f3b5a73fef461eb9b838b
      - 30
        
        441d7e239ccd9169d15b04efc59be7a158a4a5
        
        dea89200864b8e84dc0fec67850491d460a78d
      - 31
        
        02c9a1a9a5db99cadf90502de610f69c994913
        
        bd479a3d5873c8a970de821a84d2addc864839
      - 32
        
        49dd4f8012e94af03784f043478ebab38fd0b7
      - 33
        
        00c360065de4a18e093dd02df3f6419b656f74
        
        430a22c186f11e5cf838e81dacb386a7e4c8d0
        
        8b8c450f3027ea5d3751fc1e0b4bf969cc1afc
      - 35
        
        a4a3fc517a71ec8bad8f9d0c71bd9da960c494
      - 36
        
        665314753a98e11162485805608fe1aca7bc1e
        
        669d3f0592eaeb8465b81341b486a4c9c1eb11
        
        a4bff6503dfb82fcdc7a160cc36f4cdd9e333d
      - 3a
        
        f2aa1af75587b7a29685fa2dbbebbd2b3592c2
      - 3b
        
        105a7fb0f9b3bb28a87a7212dfcef08daf53c0
      - 3d
        
        d75260a68d0fb7b88fe70d0e3e73c916b963e5
      - 3e
        
        520ec5c8b88db8f95f6a230273278fd39bfbd6
      - 41
        
        22d96b39bf2bc1025a1c57ef01b8b03936773d
      - 42
        
        dbdc5586bc89d681d23851f4a5aafd79b414cb
        
        de58882d0c80e94597b575893afc8e99bd431e
      - 46
        
        07dd079dd2748f8e9cde034268e6cd68278644
        
        317618d912082d6070e4e3972809824bd35395
      - 47
        
        7b28ff8f86a3158a71c4934fbd3a2456717d7a
      - 4b
        
        6a9ae550599ea85d77fea7c0f71b4d5aaf1ba8
      - 4c
        
        6239268d21312d311a504ad0d8aeb0e4f1030b
      - 4d
        
        ec15997c4e00c7a764156687431ac8c47676a5
      - 4e
        
        2b9d23e84059b93883e1a0c0f7a859a23b087e
      - 4f
        
        1904e5164410f93689beb55d46901b214271fe
        
        8f5e88f436d478b126c5c4eccf3568e398c26f
        
        e4188e53b10cc21b50c3bf47e9ef3b2fc4c641
      - 50
        
        1b5a4a8620d5bd9a545c8941cdcbd565fca1c6
      - 52
        
        1be164ce6c87e1d6df58fd82ab160c8f6255db
        
        692c38546eb82aee2a7550c93798f70f02dc9a
        
        c5ac93561331143a9caea14d6c0f008216b4e9
      - 53
        
        6957cb8fe03bde580784e6f97537ec3444e9c3
        
        7beb3ba82da8af147f028685e61fc839cad713
        
        c2a70482993f53d6df321687b5cc4d9e95abc1
        
        cb31446e077a5f159c831e126a0e3f9a2d0ed2
      - 56
        
        9f9d05432267a13b75ca2562beb85e1e1c17db
      - 58
        
        1cd883fed6aa3b84b580a543b6ec8998f4d327
      - 5a
        
        24e11ad4dc2842c79033ad323f02e2e6c2f566
        
        85640ee385afef9b9dc9b7d2889d47b81aa1bd
      - 5c
        
        3583da4700ab6982766d187d1195b37f9a3fb5
      - 5e
        
        bb6a35716f489f249db8bb0e9df7dde150eba4
      - 5f
        
        3297d3a2009a6051a8ebc606bc674056ea03fc
      - 61
        
        124ff298a7d392b816cb1a71095ed1ec8ffe6b
      - 65
        
        3c3f5c4a2d2e44b8a188b88b64278085ebde27
      - 68
        
        1cad9627cb3af687a30507f05114c89ef9340f
      - 69
        
        cceefb779a5236074db6871d7523d92f8a709f
      - 6a
        
        8d5efd87553a3f0e977636c6b819cddf3a99e7
        
        cadba6aea97cd920745428a1c4ccd998581cc7
      - 6c
        
        65e3c6851f204d9c4cd9b616b46a9ff425b3c2
      - 6d
        
        826477b6ee1ecb3e502ffe3c26387d1bce3961
      - 6e
        
        11c93121ab5d535e4f2d50253ee4a527694a9f
      - 71
        
        99341c34f93f5fa5219ff479e82edaee5d7936
        
        ecab7197ec9646efdae05bb02b465f5b5e361a
      - 73
        
        ac62a4ab12374bbf6f72539b2e104d10d7d394
      - 74
        
        00c07a5c17fbbead0d252a22f1fcdb6e5f15c4
        
        0601d129aa08fd59be839301c923b684361dbe
      - 79
        
        2d8a9f9ef248c4358c36000722ba0c53a76497
      - 7c
        
        bdab338114c51e83e0b54c67280b91872211d5
      - 7d
        
        276eb2b779d73c46d8ed97e4be287bec96c828
      - 7e
        
        6c16a2c28e97392d20d4f4c243ecd6f6f40a91
        
        f93a5edd61fddc97d982242d7654e5ab07a09e
      - 7f
        
        4631899e208f50b855ad579b726eadea70f67d
        
        fccbe6dd68c968e78da5b6265a13c62c1fc639
      - 82
        
        2c9a312addee71797811c17690f2be8746bea4
        
        98f98722e5f5f405e9631eb4a5064d87114424
      - 83
        
        71a46410d32c3f000db4c7b11254f48a3d6055
      - 84
        
        79e658ebe74cdd0b9525a41dededf8b9839858
        
        ccc3a6d4681e19dbe6b982ddcee2e760944d55
      - 85
        
        ebb03e3089c5055e4f76d272866738a90a7842
      - 88
        
        1634035cad7fbc213a9def0b5e5ef7f15dffa2
        
        48d6820e826b907349234a642535725247f837
      - 89
        
        28f0ff69aa677f7c5f96053ca70589552e17b0
        
        e6c82914aa9457a644d5a498fa643f98b9ade3
      - 8f
        
        c65d4bab0f132f34e70868a961188deabc55dc
        
        d4a63a1a24c8b25eab5ed28c135457a8332e6b
      - 92
        
        3d517415d489cc9b3f91638c14264dd0df55a3
        
        a1582817dc2f8256db5a02bed320a05f6e5e43
      - 95
        
        8799c61770d05ba341183cd2d6a107a1ec093c
      - 96
        
        35ac150af1804b398d67cf4703d718a16806a7
      - 97
        
        8ec49ed00c46862d89580efe68f7efd98c93c3
        
        a337d34bb9ab89812b5e79c3bc2bb784d48953
      - 98
        
        9f9eab7b5ee98f4b6acf35fe8b4ef86db3a62e
      - 9b
        
        06e0bd24aa4658ed8009be3e2fe7e32ccee54e
        
        280bf8bc106ca904c9b33a90d0822c4c9b03fd
        
        2d891f12030afadc1e737914a1759ab59ef01f
        
        a872a49ab896e3d7c6203bedfd2502d8cb6521
        
        c60c7cbd38db3307551ae17ef2a8a5d623b3a0
      - 9d
        
        e1630d90e22bf6df53a0093c212e9e1d7da9b6
        
        eba8432350a07d0ab15189bb124d48b836fb62
      - 9e
        
        9c4e7476f3dce3b6808b0c8ea917a4d8503d7f
      - a3
        
        72fb170d6218ccc7298335f1ea55ff674fb3cc
      - a5
        
        4cb02cd26c7ecc2bd08debb1cfe85c3546c039
        
        504505ef08ce6040091e48eb5c1653a34574b0
      - a6
        
        56d2c7f2e9509fef687b3174282f6ec9b61498
        
        e0f46bb57f876255cdb5767729aefccbe3a33e
      - a7
        
        a8223dbda4d4cd47ae8796ad2be9bc70e46754
      - a9
        
        09e1af21f9dc9dccf095d02920266ea11fe7f5
        
        0c7b72565da6e015014e343f10ae50197587c9
      - aa
        
        80edaad05f0f16c6195e72a0130d803b58bcbc
      - ae
        
        76a4c4deb77d53e7ee512c3e85f45ba802dd08
        
        9247d41055f6721d3500645539ee00ae29d7e4
        
        a3d895c09d5eae5411aec5fb5ecbe82451bd50
      - b0
        
        d4979c857d151ef5cd27248926fb112c0c3cee
      - b1
        
        a874be65ad7df189c933f4c576475f29d15f67
      - b2
        
        7a8114a03ed27348a40f37428107e02f67acc8
        
        fd9d084c6df70da314916ee674421872f9ef50
      - b3
        
        2b6a6660b12a14fc5e8749eff9ba696b7f6061
      - b4
        
        8b1788b979853cdd57bb6cec6b9cc4dd7925bc
      - b7
        
        39a86e488eeec6f5693b7ee2590f84d6c95191
      - b8
        
        519559d3db834193518d4f4c4fed777734c8ee
        
        6f59306a576f4548ff3bd62e344570cc538932
        
        f782946f4728a629eda8038e82dd165dadee1d
      - ba
        
        7c42cc48ae095ccfffeb03cf36d519d7cbe5a3
      - bc
        
        bb2a0058b7866064b779ae246244c5342a0ccb
      - bd
        
        336a9884877889e2f85ee1849f8a068d2c6279
      - be
        
        1b6246a01d3d9c01dac0f888454369e0b32892
      - c0
        
        a5475ece89c3274e62f9132560ec8afdd569d4
      - c2
        
        34f46877a761d5c98fda2cbbeb172dc2efc81a
        
        76bec0dd56b68a8e0ed0062b38dd149422b45d
      - c3
        
        2db3968df0779b3c3a47899085625a7b86339b
      - c6
        
        32e263c2a639311be4ca1512aadee97a91bc9a
      - cb
        
        c4fb3a5d3dcc07f26b5ca02eb53a52b897aa80
      - cd
        
        02abedb89d8975a792d73de536d2ce82260553
      - ce
        
        800f70be34d9fba11f7d09b0c329b3f259507b
      - cf
        
        584b7dd3f005be3cffb47d27f1bfe980a6e53b
        
        f13bac9dd46f2c2a2fb66db64ce78caffd5b5a
      - d0
        
        2b035cae9b12c66a5e0d5ae9cf57433041ed40
        
        864f3d6209ad6f3e8ab51dc880836e67c96fc5
        
        b729579de2537a63cebdb809e8bdfb5a897a33
      - d2
        
        5865608d7d973e77c77d13bbdbe3ae454d864d
      - d3
        
        d7720074a2fc7e3bfbc18cc1bacad0f9ea61d2
      - d5
        
        06e7f9f85667d054794f808f9098b2a1625372
        
        9fe79529970f627e83857f9bd516f5474bf685
      - d6
        
        737d787752a799349095b49cf81024dfd00fb6
        
        811d3ae4f710d3eb8a16c690cd42bebc4faf4e
      - d7
        
        59cfdf018f8814d995588d176dadde0820b5ca
      - d8
        
        ecd252c04bc00a6dcc6d0b84d64285726b65d4
        
        f35efaa2dc2293277365d811c9b6222cfe59b8
      - da
        
        c1e4d0643b01e99052f575b5f6add2e10a2143
        
        ef5160c9c1500d3a9f6b0eadf434ef702a009f
      - db
        
        63d6ca3d87629e693327a2ba4a719e79847b6c
        
        de212341a0b131224b3e123f91d154348070ba
      - dc
        
        51396ce24aad46d1c761e2c96f8aee68d4b622
      - de
        
        4786c94cfa90c84982ca06a2a9623035993dfc
      - df
        
        70f4387ca97d30c66d7dc87d5abca9b0ffa820
        
        7fa77bc23477530674f05f4f619803983d5eb0
      - e0
        
        783ec3b1e38ce0f77b5b018d9b8641a7b924c3
        
        ea6d6394ab8409463adfeb753f1632ef09a47f
      - e4
        
        16c6dc5e784243ad8d33000139178fa178de87
        
        1d3105c048e789fe605c780efe2881df833ea8
      - e7
        
        178e4d5666a3b75aa48de33b966f0590665a8e
      - e8
        
        5978c76abd7430436356cd2a126018642d6a2d
      - ea
        
        dd707322e08d90e3a2208aef9060706c340799
      - eb
        
        d92ffb3ec9499292d1faa7ca60adcc5edac74f
      - ed
        
        05f4240da98a569e3c9f9a5b9e2301d7caa79e
      - ee
        
        3bd233e2bc458aab63eb36f613f853f72e6bf7
      - f2
        
        033dc11fee0a1db4a2cae358c1a808149a5aa2
        
        130ba4f980783f8605eb50387ffc2e54f66b3c
      - f3
        
        10f868efea6408fc49fd4e6d6c293db2098114
        
        e97b52355ea6f5384790bb3cda677222be900c
      - f4
        
        3ef84fa6caf08bc6e9e442e15d71eaa8c6962a
        
        ede3296c4fb294a58715e61ecef26f74ed898a
      - f5
        
        0e89c905a3e8dd519f415585283757fc85d2d6
      - f9
        
        624b31623b500b57b74e9765ac7a2d9f039c65
      - fa
        
        4eece0b1cc7dababed3d09d560f38e00ea8755
      - fc
        
        377156b487a559efb384ec2b6e551d6c2085c6
        
        60f41672c8fb6fc69c1548552988f046a2aa14
      - fe
        
        7c4d8d002499fbaff530adfaa0543d626536dd
      - ff
        
        a94206372e46914dcadeb10c212a9674daf62e
      - info
      - pack
    - refs
      - heads
        
        master
      - tags
  - ESpeakTest.xcodeproj
    - project.pbxproj
    - project.xcworkspace
      - contents.xcworkspacedata
      - xcuserdata
        
        jozefbozek.xcuserdatad
        
        UserInterfaceState.xcuserstate
    - xcuserdata
      - jozefbozek.xcuserdatad
        
        xcdebugger
        
        Breakpoints.xcbkptlist
        
        xcschemes
        
        ESpeakTest.xcscheme
        
        xcschememanagement.plist
  - ESpeakTest
    - AppDelegate.h
    - AppDelegate.m
    - en.lproj
      - InfoPlist.strings
      - ViewController_iPad.xib
      - ViewController_iPhone.xib
    - ESpeakTest-Info.plist
    - ESpeakTest-Prefix.pch
    - main.m
    - ViewController.h
    - ViewController.m
  - ESpeakTestTests
    - en.lproj
      - InfoPlist.strings
    - ESpeakTestTests.h
    - ESpeakTestTests.m
    - ESpeakTestTests-Info.plist
eSpeak_1.0.zip
- eSpeak_1.0
  - ._eSpeak_Prefix.pch
  - ._TestApp-Info.plist
  - Classes
    - ._ESpeakEngine.h
    - ._ESpeakEngine.m
    - Libraries
      - ._compiledict.cpp
      - ._debug.cpp
      - ._debug.h
      - ._dictionary.cpp
      - ._espeak_command.cpp
      - ._espeak_command.h
      - ._espeak-data
      - ._event.cpp
      - ._event.h
      - ._fifo.cpp
      - ._fifo.h
      - ._intonation.cpp
      - ._klatt.cpp
      - ._klatt.h
      - ._mbrowrap.cpp
      - ._mbrowrap.h
      - ._numbers.cpp
      - ._phoneme.h
      - ._phonemelist.cpp
      - ._portaudio.h
      - ._portaudio18.h
      - ._portaudio19.h
      - ._readclause.cpp
      - ._setlengths.cpp
      - ._sintab.h
      - ._speak_lib.cpp
      - ._speak_lib.h
      - ._speech.h
      - ._StdAfx.h
      - ._synth_mbrola.cpp
      - ._synthdata.cpp
      - ._synthesize.cpp
      - ._synthesize.h
      - ._tr_languages.cpp
      - ._translate.cpp
      - ._translate.h
      - ._voice.h
      - ._voices.cpp
      - ._wave.cpp
      - ._wave.h
      - ._wave_pulse.cpp
      - ._wave_sada.cpp
      - ._wavegen.cpp
      - espeak-data
        
        ._af_dict
        
        ._ca_dict
        
        ._config
        
        ._cs_dict
        
        ._cy_dict
        
        ._da_dict
        
        ._de_dict
        
        ._el_dict
        
        ._en_dict
        
        ._eo_dict
        
        ._es_dict
        
        ._fi_dict
        
        ._fr_dict
        
        ._grc_dict
        
        ._hbs_dict
        
        ._hi_dict
        
        ._hu_dict
        
        ._hy_dict
        
        ._id_dict
        
        ._intonations
        
        ._is_dict
        
        ._it_dict
        
        ._jbo_dict
        
        ._ku_dict
        
        ._la_dict
        
        ._lv_dict
        
        ._mbrola
        
        ._mbrola_ph
        
        ._mk_dict
        
        ._nci_dict
        
        ._nl_dict
        
        ._no_dict
        
        ._pap_dict
        
        ._phondata
        
        ._phonindex
        
        ._phontab
        
        ._pl_dict
        
        ._pt_dict
        
        ._ro_dict
        
        ._ru_dict
        
        ._sk_dict
        
        ._soundicons
        
        ._sq_dict
        
        ._sv_dict
        
        ._sw_dict
        
        ._ta_dict
        
        ._tr_dict
        
        ._vi_dict
        
        ._voices
        
        ._zh_dict
        
        ._zhy_dict
        
        mbrola_ph
        
        ._af1_phtrans
        
        ._ca1_phtrans
        
        ._cr1_phtrans
        
        ._cs_phtrans
        
        ._de2_phtrans
        
        ._de4_phtrans
        
        ._de6_phtrans
        
        ._en1_phtrans
        
        ._es_phtrans
        
        ._fr1_phtrans
        
        ._gr2_phtrans
        
        ._grc-de6_phtrans
        
        ._hu1_phtrans
        
        ._ic1_phtrans
        
        ._id1_phtrans
        
        ._in1_phtrans
        
        ._it3_phtrans
        
        ._la1_phtrans
        
        ._nl_phtrans
        
        ._pl1_phtrans
        
        ._pt_phtrans
        
        ._pt1_phtrans
        
        ._ptbr_phtrans
        
        ._ptbr4_phtrans
        
        ._ro1_phtrans
        
        ._sv_phtrans
        
        ._sv2_phtrans
        
        ._tr1_phtrans
        
        ._us_phtrans
        
        ._us3_phtrans
        
        voices
        
        !v
        
        ._croak
        
        ._f1
        
        ._f2
        
        ._f3
        
        ._f4
        
        ._f5
        
        ._fast
        
        ._klatt
        
        ._klatt2
        
        ._klatt3
        
        ._m1
        
        ._m2
        
        ._m3
        
        ._m4
        
        ._m5
        
        ._m6
        
        ._m7
        
        ._whisper
        
        ._whisperf
        
        ._!v
        
        ._af
        
        ._bs
        
        ._ca
        
        ._cs
        
        ._cy
        
        ._da
        
        ._de
        
        ._default
        
        ._el
        
        ._en
        
        ._eo
        
        ._es
        
        ._es-la
        
        ._fi
        
        ._fr
        
        ._fr-be
        
        ._hi
        
        ._hr
        
        ._hu
        
        ._hy
        
        ._hy-west
        
        ._id
        
        ._is
        
        ._it
        
        ._ku
        
        ._la
        
        ._lv
        
        ._mb
        
        ._mk
        
        ._nl
        
        ._no
        
        ._pl
        
        ._pt
        
        ._pt-pt
        
        ._ro
        
        ._ru
        
        ._sk
        
        ._sq
        
        ._sr
        
        ._sv
        
        ._sw
        
        ._ta
        
        ._test
        
        ._tr
        
        ._vi
        
        ._zh
        
        ._zh-yue
        
        en
        
        ._en
        
        ._en-n
        
        ._en-rp
        
        ._en-sc
        
        ._en-us
        
        ._en-wi
        
        ._en-wm
        
        mb
        
        ._mb-af1
        
        ._mb-af1-en
        
        ._mb-br1
        
        ._mb-br3
        
        ._mb-br4
        
        ._mb-cr1
        
        ._mb-cz2
        
        ._mb-de2
        
        ._mb-de4
        
        ._mb-de4-en
        
        ._mb-de5
        
        ._mb-de5-en
        
        ._mb-de6
        
        ._mb-de6-grc
        
        ._mb-de7
        
        ._mb-en1
        
        ._mb-es1
        
        ._mb-es2
        
        ._mb-fr1
        
        ._mb-fr1-en
        
        ._mb-fr4
        
        ._mb-fr4-en
        
        ._mb-gr2
        
        ._mb-gr2-en
        
        ._mb-hu1
        
        ._mb-hu1-en
        
        ._mb-ic1
        
        ._mb-id1
        
        ._mb-it3
        
        ._mb-it4
        
        ._mb-la1
        
        ._mb-nl2
        
        ._mb-nl2-en
        
        ._mb-pl1
        
        ._mb-pl1-en
        
        ._mb-pt1
        
        ._mb-ro1
        
        ._mb-ro1-en
        
        ._mb-sw1
        
        ._mb-sw1-en
        
        ._mb-sw2
        
        ._mb-sw2-en
        
        ._mb-tr1
        
        ._mb-tr2
        
        ._mb-us1
        
        ._mb-us2
        
        ._mb-us3
        
        test
        
        ._grc
        
        ._jbo
        
        ._nci
        
        ._pap
    - Tests
- eSpeak_1.0
  - Classes
    - ESpeakEngine.h
    - ESpeakEngine.m
  - eSpeak.xcodeproj
    - jBozEk.mode1v3
    - jBozEk.pbxuser
    - project.pbxproj
    - project.xcworkspace
      - contents.xcworkspacedata
      - xcuserdata
        
        jBozEk.xcuserdatad
        
        UserInterfaceState.xcuserstate
        
        jozefbozek.xcuserdatad
        
        UserInterfaceState.xcuserstate
        
        WorkspaceSettings.xcsettings
    - xcuserdata
      - jBozEk.xcuserdatad
        
        xcschemes
        
        xcschememanagement.plist
      - jozefbozek.xcuserdatad
        
        xcdebugger
        
        Breakpoints.xcbkptlist
        
        xcschemes
        
        eSpeak.xcscheme
        
        TestApp.xcscheme
        
        xcschememanagement.plist
  - eSpeak_Prefix.pch
  - Libraries
    - compiledict.cpp
    - debug.cpp
    - debug.h
    - dictionary.cpp
    - espeak_command.cpp
    - espeak_command.h
    - espeak-data
      - af_dict
      - ca_dict
      - config
      - cs_dict
      - cy_dict
      - da_dict
      - de_dict
      - el_dict
      - en_dict
      - eo_dict
      - es_dict
      - fi_dict
      - fr_dict
      - grc_dict
      - hbs_dict
      - hi_dict
      - hu_dict
      - hy_dict
      - id_dict
      - intonations
      - is_dict
      - it_dict
      - jbo_dict
      - ku_dict
      - la_dict
      - lv_dict
      - mbrola
      - mbrola_ph
        
        af1_phtrans
        
        ca1_phtrans
        
        cr1_phtrans
        
        cs_phtrans
        
        de2_phtrans
        
        de4_phtrans
        
        de6_phtrans
        
        en1_phtrans
        
        es_phtrans
        
        fr1_phtrans
        
        gr2_phtrans
        
        grc-de6_phtrans
        
        hu1_phtrans
        
        ic1_phtrans
        
        id1_phtrans
        
        in1_phtrans
        
        it3_phtrans
        
        la1_phtrans
        
        nl_phtrans
        
        pl1_phtrans
        
        pt_phtrans
        
        pt1_phtrans
        
        ptbr_phtrans
        
        ptbr4_phtrans
        
        ro1_phtrans
        
        sv_phtrans
        
        sv2_phtrans
        
        tr1_phtrans
        
        us_phtrans
        
        us3_phtrans
      - mk_dict
      - nci_dict
      - nl_dict
      - no_dict
      - pap_dict
      - phondata
      - phonindex
      - phontab
      - pl_dict
      - pt_dict
      - ro_dict
      - ru_dict
      - sk_dict
      - soundicons
      - sq_dict
      - sv_dict
      - sw_dict
      - ta_dict
      - tr_dict
      - vi_dict
      - voices
        
        !v
        
        croak
        
        f1
        
        f2
        
        f3
        
        f4
        
        f5
        
        fast
        
        klatt
        
        klatt2
        
        klatt3
        
        m1
        
        m2
        
        m3
        
        m4
        
        m5
        
        m6
        
        m7
        
        whisper
        
        whisperf
        
        af
        
        bs
        
        ca
        
        cs
        
        cy
        
        da
        
        de
        
        default
        
        el
        
        en
        
        en
        
        en-n
        
        en-rp
        
        en-sc
        
        en-us
        
        en-wi
        
        en-wm
        
        eo
        
        es
        
        es-la
        
        fi
        
        fr
        
        fr-be
        
        hi
        
        hr
        
        hu
        
        hy
        
        hy-west
        
        id
        
        is
        
        it
        
        ku
        
        la
        
        lv
        
        mb
        
        mb-af1
        
        mb-af1-en
        
        mb-br1
        
        mb-br3
        
        mb-br4
        
        mb-cr1
        
        mb-cz2
        
        mb-de2
        
        mb-de4
        
        mb-de4-en
        
        mb-de5
        
        mb-de5-en
        
        mb-de6
        
        mb-de6-grc
        
        mb-de7
        
        mb-en1
        
        mb-es1
        
        mb-es2
        
        mb-fr1
        
        mb-fr1-en
        
        mb-fr4
        
        mb-fr4-en
        
        mb-gr2
        
        mb-gr2-en
        
        mb-hu1
        
        mb-hu1-en
        
        mb-ic1
        
        mb-id1
        
        mb-it3
        
        mb-it4
        
        mb-la1
        
        mb-nl2
        
        mb-nl2-en
        
        mb-pl1
        
        mb-pl1-en
        
        mb-pt1
        
        mb-ro1
        
        mb-ro1-en
        
        mb-sw1
        
        mb-sw1-en
        
        mb-sw2
        
        mb-sw2-en
        
        mb-tr1
        
        mb-tr2
        
        mb-us1
        
        mb-us2
        
        mb-us3
        
        mk
        
        nl
        
        no
        
        pl
        
        pt
        
        pt-pt
        
        ro
        
        ru
        
        sk
        
        sq
        
        sr
        
        sv
        
        sw
        
        ta
        
        test
        
        grc
        
        jbo
        
        nci
        
        pap
        
        tr
        
        vi
        
        zh
        
        zh-yue
      - zh_dict
      - zhy_dict
    - event.cpp
    - event.h
    - fifo.cpp
    - fifo.h
    - intonation.cpp
    - klatt.cpp
    - klatt.h
    - mbrowrap.cpp
    - mbrowrap.h
    - numbers.cpp
    - phoneme.h
    - phonemelist.cpp
    - portaudio.h
    - portaudio18.h
    - portaudio19.h
    - readclause.cpp
    - setlengths.cpp
    - sintab.h
    - speak_lib.cpp
    - speak_lib.h
    - speech.h
    - StdAfx.h
    - synth_mbrola.cpp
    - synthdata.cpp
    - synthesize.cpp
    - synthesize.h
    - tr_languages.cpp
    - translate.cpp
    - translate.h
    - voice.h
    - voices.cpp
    - wave.cpp
    - wave.h
    - wave_pulse.cpp
    - wave_sada.cpp
    - wavegen.cpp
  - TestApp-Info.plist
  - Tests

/***************************************************************************
 *   Copyright (C) 2005 to 2010 by Jonathan Duddington                     *
 *   email: jonsd@users.sourceforge.net                                    *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 3 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write see:                           *
 *               <http://www.gnu.org/licenses/>.                           *
 ***************************************************************************/

#include "StdAfx.h"

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>

#include "speak_lib.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"
#include "translate.h"

//#define OPT_FORMAT         // format the text and write formatted copy to Log file 
//#define OUTPUT_FORMAT

extern void Write4Bytes(FILE *f, int value);
int HashDictionary(const char *string);

static FILE *f_log = NULL;
extern char *dir_dictionary;

extern char word_phonemes[N_WORD_PHONEMES];    // a word translated into phoneme codes

static int linenum;
static int error_count;
static int text_mode = 0;
static int debug_flag = 0;
static int error_need_dictionary = 0;

static int hash_counts[N_HASH_DICT];
static char *hash_chains[N_HASH_DICT];
static char letterGroupsDefined[N_LETTER_GROUPS];

MNEM_TAB mnem_rules[] = {
	{"unpr",   0x01},
	{"w_alt2", 0x12},
	{"w_alt3", 0x13},
	{"w_alt", 0x11},   // note: put longer names before their sub-strings
	{"p_alt2", 0x22},
	{"p_alt3", 0x23},
	{"p_alt", 0x21},
	{NULL, -1} };

MNEM_TAB mnem_flags[] = {
	// these in the first group put a value in bits0-3 of dictionary_flags
	{"$1", 0x41},           // stress on 1st syllable
	{"$2", 0x42},           // stress on 2nd syllable
	{"$3", 0x43},
	{"$4", 0x44},
	{"$5", 0x45},
	{"$6", 0x46},
	{"$7", 0x47},
	{"$u", 0x48},           // reduce to unstressed
	{"$u1", 0x49},
	{"$u2", 0x4a},
	{"$u3", 0x4b},
	{"$u+",  0x4c},           // reduce to unstressed, but stress at end of clause
	{"$u1+", 0x4d},
	{"$u2+", 0x4e},
	{"$u3+", 0x4f},


	// these set the corresponding numbered bit if dictionary_flags
	{"$pause",     8},    /* ensure pause before this word */
	{"$only",      9},    /* only match on this word without suffix */
	{"$onlys",     10},    /* only match with none, or with 's' suffix */
	{"$strend",    11},    /* full stress if at end of clause */
	{"$strend2",   12},    /* full stress if at end of clause, or only followed by unstressed */
	{"$unstressend",13},   /* reduce stress at end of clause */
	{"$atend",     14},    /* use this pronunciation if at end of clause */
	{"$atstart",   15},   // use this pronunciation at start of clause

	{"$abbrev",    17},    /* use this pronuciation rather than split into letters */
	{"$stem",      18},   // must have a suffix

// language specific
	{"$double",    19},   // IT double the initial consonant of next word
	{"$alt",       20},   // use alternative pronunciation
	{"$alt1",      20},   // synonym for $alt
	{"$alt2",      21},
	{"$alt3",      22},
	{"$combine",   23},   // Combine with the next word

	{"$dot",       24},   // ignore '.' after this word (abbreviation)
	{"$hasdot",    25},   // use this pronunciation if there is a dot after the word

	{"$max3",      27},   // limit to 3 repetitions
	{"$brk",       28},   // a shorter $pause
	{"$text",      29},   // word translates to replcement text, not phonemes

// flags in dictionary word 2
	{"$verbf",   0x20},    /* verb follows */
	{"$verbsf",  0x21},    /* verb follows, allow -s suffix */
	{"$nounf",   0x22},    /* noun follows */
	{"$pastf",   0x23},   /* past tense follows */
	{"$verb",    0x24},   /* use this pronunciation when its a verb */
	{"$noun",    0x25},   /* use this pronunciation when its a noun */
	{"$past",    0x26},   /* use this pronunciation when its past tense */
	{"$verbextend",0x28},   /* extend influence of 'verb follows' */
	{"$capital", 0x29},   /* use this pronunciation if initial letter is upper case */
	{"$allcaps", 0x2a},   /* use this pronunciation if initial letter is upper case */
	{"$accent",  0x2b},   // character name is base-character name + accent name
	{"$sentence",0x2d},   // only if this clause is a sentence (i.e. terminator is {. ? !} not {, ; :}

	// doesn't set dictionary_flags
	{"$?",        100},   // conditional rule, followed by byte giving the condition number

	{"$textmode",  200},
	{"$phonememode", 201},
	{NULL,   -1}
};


#define LEN_GROUP_NAME  12

typedef struct {
	char name[LEN_GROUP_NAME+1];
	unsigned int start;
	unsigned int length;
	int group3_ix;
} RGROUP;


int isspace2(unsigned int c)
{//=========================
// can't use isspace() because on Windows, isspace(0xe1) gives TRUE !
	int c2;

	if(((c2 = (c & 0xff)) == 0) || (c > ' '))
		return(0);
	return(1);
}



static FILE *fopen_log(const char *fname,const char *access)
{//==================================================
// performs fopen, but produces error message to f_log if it fails
	FILE *f;

	if((f = fopen(fname,access)) == NULL)
	{
		if(f_log != NULL)
			fprintf(f_log,"Can't access (%s) file '%s'\n",access,fname);
	}
	return(f);
}


const char *LookupMnemName(MNEM_TAB *table, const int value)
//==========================================================
/* Lookup a mnemonic string in a table, return its name */
{
   while(table->mnem != NULL)
   {
      if(table->value==value)
         return(table->mnem);
      table++;
   }
   return("");   /* not found */
}   /* end of LookupMnemValue */


char *print_dictionary_flags(unsigned int *flags)
{//==============================================
	static char buf[20];

	sprintf(buf,"%s  0x%x/%x",LookupMnemName(mnem_flags,(flags[0] & 0xf)+0x40), flags[0], flags[1]);
	return(buf);
}




char *DecodeRule(const char *group_chars, int group_length, char *rule, int control)
{//=================================================================================
/* Convert compiled match template to ascii */

   unsigned char rb;
	unsigned char c;
	char *p;
   int  ix;
	int  match_type;
	int  finished=0;
	int  value;
	int  linenum=0;
	int  flags;
	int  suffix_char;
	int  condition_num=0;
	const char *name;
   char buf[60];
   char buf_pre[60];
	char suffix[20];
	static char output[60];

	static char symbols[] = {' ',' ',' ',' ',' ',' ',' ',' ',' ',
			'@','&','%','+','#','S','D','Z','A','L','!',' ','?','?','J','N','K','V','?','T','X','?','W'};

	static char symbols_lg[] = {'A','B','C','H','F','G','Y'};

	match_type = 0;
   buf_pre[0] = 0;

	for(ix=0; ix<group_length; ix++)
	{
		buf[ix] = group_chars[ix];
	}
	buf[ix] = 0;

	p = &buf[strlen(buf)];
   while(!finished)
   {
		rb = *rule++;

		if(rb <= RULE_LINENUM)
		{
			switch(rb)
			{
			case 0:
			case RULE_PHONEMES:
				finished=1;
				break;
			case RULE_PRE:
				match_type = RULE_PRE;
				*p = 0;
				p = buf_pre;
				break;
			case RULE_POST:
				match_type = RULE_POST;
				*p = 0;
				strcat(buf," (");
				p = &buf[strlen(buf)];
				break;
			case RULE_PH_COMMON:
				break;
			case RULE_CONDITION:
				/* conditional rule, next byte gives condition number */
				condition_num = *rule++;
				break;
			case RULE_LINENUM:
				value = (rule[1] & 0xff) - 1;
				linenum = (rule[0] & 0xff) - 1 + (value * 255);
				rule+=2;
				break;
			}
			continue;
		}
		
		if(rb == RULE_DOLLAR)
		{
			value = *rule++ & 0xff;
			if((value != 0x01) || (control & FLAG_UNPRON_TEST))
			{
				p[0] = '$';
				name = LookupMnemName(mnem_rules, value);
				strcpy(&p[1],name);
				p += (strlen(name)+1);
			}
			c = ' ';
		}
		else
		if(rb == RULE_ENDING)
		{
			static const char *flag_chars = "eipvdfq tba ";
			flags = ((rule[0] & 0x7f)<< 8) + (rule[1] & 0x7f);
			suffix_char = 'S';
			if(flags & (SUFX_P >> 8))
				suffix_char = 'P';
			sprintf(suffix,"%c%d",suffix_char,rule[2] & 0x7f);
			rule += 3;
			for(ix=0;ix<9;ix++)
			{
				if(flags & 1)
					sprintf(&suffix[strlen(suffix)],"%c",flag_chars[ix]);
				flags = (flags >> 1);
			}
			strcpy(p,suffix);
			p += strlen(suffix);
			c = ' ';
		}
		else
		if(rb == RULE_LETTERGP)
		{
			c = symbols_lg[*rule++ - 'A'];
		}
		else
		if(rb == RULE_LETTERGP2)
		{
			value = *rule++ - 'A';
			p[0] = 'L';
			p[1] = (value / 10) + '0';
			c = (value % 10) + '0';

			if(match_type == RULE_PRE)
			{
				p[0] = c;
				c = 'L';
			}
			p+=2;
		}
		else
		if(rb <= RULE_LAST_RULE)
			c = symbols[rb];
		else
		if(rb == RULE_SPACE)
			c = '_';
		else
			c = rb;
		*p++ = c;
	}
	*p = 0;

	p = output;
	if(linenum > 0)
	{
		sprintf(p,"%5d:\t",linenum);
		p += 7;
	}
	if(condition_num > 0)
	{
		sprintf(p,"?%d ",condition_num);
		p = &p[strlen(p)];
	}
	if((ix = strlen(buf_pre)) > 0)
	{
		while(--ix >= 0)
			*p++ = buf_pre[ix];
		*p++ = ')';
		*p++ = ' ';
	}
	*p = 0;
	strcat(p,buf);
	ix = strlen(output);
	while(ix < 8)
		output[ix++]=' ';
	output[ix]=0;
   return(output);
}   /* end of DecodeRule */




static int compile_line(char *linebuf, char *dict_line, int *hash)
{//===============================================================
// Compile a line in the language_list file
	unsigned char  c;
	char *p;
	char *word;
	char *phonetic;
	unsigned int  ix;
	int  step;
	unsigned int  n_flag_codes = 0;
	int  flag_offset;
	int  length;
	int  multiple_words = 0;
	int  multiple_numeric_hyphen = 0;
	char *multiple_string = NULL;
	char *multiple_string_end = NULL;
	
	int len_word;
	int len_phonetic;
	int text_not_phonemes;   // this word specifies replacement text, not phonemes
	unsigned int  wc;
	int all_upper_case;
	
	char *mnemptr;
	char *comment;
	unsigned char flag_codes[100];
	char encoded_ph[200];
	unsigned char bad_phoneme[4];
static char nullstring[] = {0};

	comment = NULL;
	text_not_phonemes = 0;
	phonetic = word = nullstring;

if(memcmp(linebuf,"_-",2)==0)
{
step=1;  // TEST
}
	p = linebuf;
//	while(isspace2(*p)) p++;

#ifdef deleted
	if(*p == '$')
	{
		if(memcmp(p,"$textmode",9) == 0)
		{
			text_mode = 1;
			return(0);
		}
		if(memcmp(p,"$phonememode",12) == 0)
		{
			text_mode = 0;
			return(0);
		}
	}
#endif

	step = 0;
	
	c = 0;
	while(c != '\n')
	{
		c = *p;
	
		if((c == '?') && (step==0))
		{
			// conditional rule, allow only if the numbered condition is set for the voice
			flag_offset = 100;

			p++;
			if(*p == '!')
			{
				// allow only if the numbered condition is NOT set
				flag_offset = 132;
				p++;
			}

			ix = 0;
			if(isdigit(*p))
			{
				ix += (*p-'0');
				p++;
			}
			if(isdigit(*p))
			{
				ix = ix*10 + (*p-'0');
				p++;
			}
			flag_codes[n_flag_codes++] = ix + flag_offset;
			c = *p;
		}
		
		if((c == '$') && isalnum(p[1]))
		{
			/* read keyword parameter */
			mnemptr = p;
			while(!isspace2(c = *p)) p++;
			*p = 0;
	
			ix = LookupMnem(mnem_flags,mnemptr);
			if(ix > 0)
			{
				if(ix == 200)
				{
					text_mode = 1;
				}
				else
				if(ix == 201)
				{
					text_mode = 0;
				}
				else
				if(ix == BITNUM_FLAG_TEXTMODE)
				{
					text_not_phonemes = 1;
				}
				else
				{
					flag_codes[n_flag_codes++] = ix;
				}
			}
			else
			{
				fprintf(f_log,"%5d: Unknown keyword: %s\n",linenum,mnemptr);
				error_count++;
			}
		}
	
		if((c == '/') && (p[1] == '/') && (multiple_words==0))
		{
			c = '\n';   /* "//" treat comment as end of line */
			comment = p;
		}
	
		switch(step)
		{
		case 0:
			if(c == '(')
			{
				multiple_words = 1;
				word = p+1;
				step = 1;
			}
			else
			if(!isspace2(c))
			{
				word = p;
				step = 1;
			}
			break;
	
		case 1:
			if((c == '-') && multiple_words)
			{
				if(isdigit(word[0]))
				{
					multiple_numeric_hyphen = 1;
				}
				else
				{
					flag_codes[n_flag_codes++] = BITNUM_FLAG_HYPHENATED;
				}
				c = ' ';
			}
			if(isspace2(c))
			{
				p[0] = 0;   /* terminate english word */

				if(multiple_words)
				{
					multiple_string = multiple_string_end = p+1;
					step = 2;
				}
				else
				{
					step = 3;
				}
			}
			else
			if((c == ')') && multiple_words)
			{
				p[0] = 0;
				step = 3;
				multiple_words = 0;
			}
			break;

		case 2:
			if(isspace2(c))
			{
				multiple_words++;
			}
			else
			if(c == ')')
			{
				p[0] = ' ';   // terminate extra string
				multiple_string_end = p+1;
				step = 3;
			}
			break;
	
		case 3:
			if(!isspace2(c))
			{
				phonetic = p;
				step = 4;
			}
			break;
	
		case 4:
			if(isspace2(c))
			{
				p[0] = 0;   /* terminate phonetic */
				step = 5;
			}
			break;
	
		case 5:
			break;
		}
		p++;
	}
	
	if(word[0] == 0)
	{
#ifdef OPT_FORMAT
		if(comment != NULL)
			fprintf(f_log,"%s",comment);
		else
			fputc('\n',f_log);
#endif
		return(0);   /* blank line */
	}

	if(text_mode)
		text_not_phonemes = 1;

	if(text_not_phonemes)
	{
		if(word[0] == '_')
		{
			// This is a special word, used by eSpeak.  Translate this into phonemes now
			strcat(phonetic, " ");     // need a space to indicate word-boundary

	// PROBLEM  vowel reductions are not applied to the translated phonemes
	// condition rules are not applied
			TranslateWord(translator,phonetic,0,NULL);
			text_not_phonemes = 0;
			strncpy0(encoded_ph, word_phonemes, N_WORD_BYTES-4);

			if((word_phonemes[0] == 0) && (error_need_dictionary < 3))
			{
				// the dictionary was not loaded, we need a second attempt
				error_need_dictionary++;
				fprintf(f_log,"%5d: Need to compile dictionary again\n",linenum);
			}
{
//char decoded_phonemes[128];
//DecodePhonemes(word_phonemes,decoded_phonemes);
//printf("Translator %x  %s  [%s] [%s]\n",translator->translator_name,word,phonetic,decoded_phonemes);
}
		}
		else
		{
			// this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word
			strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4);
		}
	}
	else
	{
		EncodePhonemes(phonetic,encoded_ph,bad_phoneme);
		if(strchr(encoded_ph,phonSWITCH) != 0)
		{
			flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S;  // don't match on suffixes (except 's') when switching languages
		}

		// check for errors in the phonemes codes
		for(ix=0; ix<sizeof(encoded_ph); ix++)
		{
			c = encoded_ph[ix];
			if(c == 0)   break;
		
			if(c == 255)
			{
				/* unrecognised phoneme, report error */
				fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s  %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic);
				error_count++;
			}
		}
	}

	if(text_not_phonemes != translator->langopts.textmode)
	{
		flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE;
	}


	if(sscanf(word,"U+%x",&wc) == 1)
	{
		// Character code
		ix = utf8_out(wc, word);
		word[ix] = 0;
	}
	else
	if(word[0] != '_')
	{
		// convert to lower case, and note if the word is all-capitals
		int c2;

		all_upper_case = 1;
		p = word;
		for(p=word;;)
		{
			// this assumes that the lower case char is the same length as the upper case char
			// OK, except for Turkish "I", but use towlower() rather than towlower2()
			ix = utf8_in(&c2,p);
			if(c2 == 0)
				break;
			if(iswupper(c2))
			{
				utf8_out(towlower(c2),p);
			}
			else
			{
				all_upper_case = 0;
			}
			p += ix;
		}
		if(all_upper_case)
		{
			flag_codes[n_flag_codes++] = BITNUM_FLAG_ALLCAPS;
		}
	}

	len_word = strlen(word);

	if(translator->transpose_min > 0)
	{
		len_word = TransposeAlphabet(translator, word);
	}

	*hash = HashDictionary(word);
	len_phonetic = strlen(encoded_ph);
	
	dict_line[1] = len_word;   // bit 6 indicates whether the word has been compressed
	len_word &= 0x3f;

	memcpy(&dict_line[2],word,len_word);

	if(len_phonetic == 0)
	{
		// no phonemes specified. set bit 7
		dict_line[1] |= 0x80;
		length = len_word + 2;
	}
	else
	{
		length = len_word + len_phonetic + 3;
		strcpy(&dict_line[(len_word)+2],encoded_ph);
	}
	
	for(ix=0; ix<n_flag_codes; ix++)
	{
		dict_line[ix+length] = flag_codes[ix];
	}
	length += n_flag_codes;

	if((multiple_string != NULL) && (multiple_words > 0))
	{
		if(multiple_words > 10)
		{
			fprintf(f_log,"%5d: Two many parts in a multi-word entry: %d\n",linenum,multiple_words);
		}
		else
		{
			dict_line[length++] = 80 + multiple_words;
			ix = multiple_string_end - multiple_string;
			if(multiple_numeric_hyphen)
			{
				dict_line[length++] = ' ';
			}
			memcpy(&dict_line[length],multiple_string,ix);
			length += ix;
		}
	}
	dict_line[0] = length;

#ifdef OPT_FORMAT
	spaces = 16;
	for(ix=0; ix<n_flag_codes; ix++)
	{
		if(flag_codes[ix] >= 100)
		{
			fprintf(f_log,"?%d ",flag_codes[ix]-100);
			spaces -= 3;
		}
	}

	fprintf(f_log,"%s",word);
	spaces -= strlen(word);
	DecodePhonemes(encoded_ph,decoded_ph);
	while(spaces-- > 0) fputc(' ',f_log);
	spaces += (14 - strlen(decoded_ph));
	
	fprintf(f_log," %s",decoded_ph);
	while(spaces-- > 0) fputc(' ',f_log);
	for(ix=0; ix<n_flag_codes; ix++)
	{
		if(flag_codes[ix] < 100)
			fprintf(f_log," %s",lookup_mnem(mnem_flags,flag_codes[ix]));
	}
	if(comment != NULL)
		fprintf(f_log," %s",comment);
	else
		fputc('\n',f_log);
#endif

	return(length);
}  /* end of compile_line */



static void compile_dictlist_start(void)
{//=====================================
// initialise dictionary list
	int ix;
	char *p;
	char *p2;

	for(ix=0; ix<N_HASH_DICT; ix++)
	{
		p = hash_chains[ix];
		while(p != NULL)
		{
			memcpy(&p2,p,sizeof(char *));
			free(p);
			p = p2;
		}
		hash_chains[ix] = NULL;
		hash_counts[ix]=0;
	}
}


static void compile_dictlist_end(FILE *f_out)
{//==========================================
// Write out the compiled dictionary list
	int hash;
	int length;
	char *p;

	if(f_log != NULL)
	{
#ifdef OUTPUT_FORMAT
		for(hash=0; hash<N_HASH_DICT; hash++)
		{
			fprintf(f_log,"%8d",hash_counts[hash]);
			if((hash & 7) == 7)
				fputc('\n',f_log);
		}
		fflush(f_log);
#endif
	}
	
	for(hash=0; hash<N_HASH_DICT; hash++)
	{
		p = hash_chains[hash];
		hash_counts[hash] = (int)ftell(f_out);
	
		while(p != NULL)
		{
			length = *(p+sizeof(char *));
			fwrite(p+sizeof(char *),length,1,f_out);
			memcpy(&p,p,sizeof(char *));
		}
		fputc(0,f_out);
	}
}



static int compile_dictlist_file(const char *path, const char* filename)
{//=====================================================================
	int  length;
	int  hash;
	char *p;
	int  count=0;
	FILE *f_in;
	char buf[200];
	char fname[sizeof(path_home)+45];
	char dict_line[128];
	
	text_mode = 0;

	// try with and without '.txt' extension
	sprintf(fname,"%s%s.txt",path,filename);
	if((f_in = fopen(fname,"r")) == NULL)
	{
		sprintf(fname,"%s%s",path,filename);
		if((f_in = fopen(fname,"r")) == NULL)
			return(-1);
	}

	fprintf(f_log,"Compiling: '%s'\n",fname);

	linenum=0;
	
	while(fgets(buf,sizeof(buf),f_in) != NULL)
	{
		linenum++;

		length = compile_line(buf,dict_line,&hash);
		if(length == 0)  continue;   /* blank line */

		hash_counts[hash]++;
	
		p = (char *)malloc(length+sizeof(char *));
		if(p == NULL)
		{
			if(f_log != NULL)
			{
				fprintf(f_log,"Can't allocate memory\n");
				error_count++;
			}
			break;
		}
	
		memcpy(p,&hash_chains[hash],sizeof(char *));
		hash_chains[hash] = p;
		memcpy(p+sizeof(char *),dict_line,length);
		count++;
	}
	
	fprintf(f_log,"\t%d entries\n",count);
	fclose(f_in);
	return(0);
}   /* end of compile_dictlist_file */



static char rule_cond[80];
static char rule_pre[80];
static char rule_post[80];
static char rule_match[80];
static char rule_phonemes[80];
static char group_name[LEN_GROUP_NAME+1];
static int group3_ix;

#define N_RULES 2000		// max rules for each group



static void copy_rule_string(char *string, int &state)
{//===================================================
// state 0: conditional, 1=pre, 2=match, 3=post, 4=phonemes
	static char *outbuf[5] = {rule_cond, rule_pre, rule_match, rule_post, rule_phonemes};
	static int next_state[5] = {2,2,4,4,4};
	char *output;
	char *p;
	int ix;
	int len;
	char c;
	int  sxflags;
	int  value;
	int  literal;
	MNEM_TAB *mr;

	if(string[0] == 0) return;

	output = outbuf[state];
	if(state==4)
	{
		// append to any previous phoneme string, i.e. allow spaces in the phoneme string
		len = strlen(rule_phonemes);
		if(len > 0)
			rule_phonemes[len++] = ' ';
		output = &rule_phonemes[len];
	}
	sxflags = 0x808000;           // to ensure non-zero bytes
	
	for(p=string,ix=0;;)
	{
		literal = 0;
		c = *p++;
		if(c == '\\')
		{
			c = *p++;   // treat next character literally
			if((c >= '0') && (c <= '3') && (p[0] >= '0') && (p[0] <= '7') && (p[1] >= '0') && (p[1] <= '7'))
			{
				// character code given by 3 digit octal value;
				c = (c-'0')*64 + (p[0]-'0')*8 + (p[1]-'0');
				p += 2;
			}
			literal = 1;
		}

		if((state==1) || (state==3))
		{
			// replace special characters (note: 'E' is reserved for a replaced silent 'e')
			if(literal == 0)
			{
				static const char lettergp_letters[9] = {LETTERGP_A,LETTERGP_B,LETTERGP_C,0,0,LETTERGP_F,LETTERGP_G,LETTERGP_H,LETTERGP_Y};
				switch(c)
				{
				case '_':
					c = RULE_SPACE;
					break;

				case 'Y':
					c = 'I';   // drop through to next case
				case 'A':   // vowel
				case 'B':
				case 'C':
				case 'H':
				case 'F':
				case 'G':
					if(state == 1)
					{
						// pre-rule, put the number before the RULE_LETTERGP;
						output[ix++] = lettergp_letters[c-'A'] + 'A';
						c = RULE_LETTERGP;
					}
					else
					{
						output[ix++] = RULE_LETTERGP;
						c = lettergp_letters[c-'A'] + 'A';
					}
					break;
				case 'D':
					c = RULE_DIGIT;
					break;
				case 'K':
					c = RULE_NOTVOWEL;
					break;
				case 'N':
					c = RULE_NO_SUFFIX;
					break;
				case 'V':
					c = RULE_IFVERB;
					break;
				case 'Z':
					c = RULE_NONALPHA;
					break;
				case '+':
					c = RULE_INC_SCORE;
					break;
				case '@':
					c = RULE_SYLLABLE;
					break;
				case '&':
					c = RULE_STRESSED;
					break;
				case '%':
					c = RULE_DOUBLE;
					break;
				case '#':
					c = RULE_DEL_FWD;
					break;
				case '!':
					c = RULE_CAPITAL;
					break;
				case 'T':
					output[ix++] = RULE_DOLLAR;
					c = 0x11;
					break;
				case 'W':
					c = RULE_SPELLING;
					break;
				case 'X':
					c = RULE_NOVOWELS;
					break;
				case 'J':
					c = RULE_SKIPCHARS;
					break;
				case 'L':
					// expect two digits
					c = *p++ - '0';
					value = *p++ - '0';
					c = c * 10 + value;
					if((value < 0) || (value > 9))
					{
						c = 0;
						fprintf(f_log,"%5d: Expected 2 digits after 'L'\n",linenum);
						error_count++;
					}
					else
					if((c <= 0) || (c >= N_LETTER_GROUPS) || (letterGroupsDefined[(int)c] == 0))
					{
						fprintf(f_log,"%5d: Letter group L%.2d not defined\n",linenum,c);
						error_count++;
					}
					c += 'A';
					if(state == 1)
					{
						// pre-rule, put the group number before the RULE_LETTERGP command
						output[ix++] = c;
						c = RULE_LETTERGP2;
					}
					else
					{
						output[ix++] = RULE_LETTERGP2;
					}
					break;

				case '$':
					output[ix++] = RULE_DOLLAR;
					c = 0;
					mr = mnem_rules;
					while(mr->mnem != NULL)
					{
						len = strlen(mr->mnem);
						if(memcmp(p, mr->mnem, len) == 0)
						{
							c = mr->value;
							p += len;
							break;
						}
						mr++;
					}
					if(c == 0)
					{
						fprintf(f_log,"%5d: $ command not recognized\n",linenum);
						error_count++;
					}
					break;

				case 'P':
					sxflags |= SUFX_P;   // Prefix, now drop through to Suffix
				case 'S':
					output[ix++] = RULE_ENDING;
					value = 0;
					while(!isspace2(c = *p++) && (c != 0))
					{
						switch(c)
						{
						case 'e':
							sxflags |= SUFX_E;
							break;
						case 'i':
							sxflags |= SUFX_I;
							break;
						case 'p':	// obsolete, replaced by 'P' above
							sxflags |= SUFX_P;
							break;
						case 'v':
							sxflags |= SUFX_V;
							break;
						case 'd':
							sxflags |= SUFX_D;
							break;
						case 'f':
							sxflags |= SUFX_F;
							break;
						case 'q':
							sxflags |= SUFX_Q;
							break;
						case 't':
							sxflags |= SUFX_T;
							break;
						case 'b':
							sxflags |= SUFX_B;
							break;
						case 'a':
							sxflags |= SUFX_A;
							break;
						default:
							if(isdigit(c))
								value = (value*10) + (c - '0');
							break;
						}
					}
					p--;
					output[ix++] = sxflags >> 16;
					output[ix++] = sxflags >> 8;
					c = value | 0x80;
					break;
				}
			}
		}
		output[ix++] = c;
		if(c == 0) break;
	}

	state = next_state[state];
}  //  end of copy_rule_string



static char *compile_rule(char *input)
{//===================================
	int ix;
	unsigned char c;
	int wc;
	char *p;
	char *prule;
	int len;
	int len_name;
	int state=2;
	int finish=0;
	int pre_bracket=0;
	char buf[80];
	char output[150];
	unsigned char bad_phoneme[4];

	buf[0]=0;
	rule_cond[0]=0;
	rule_pre[0]=0;
	rule_post[0]=0;
	rule_match[0]=0;
	rule_phonemes[0]=0;

	p = buf;
	
	for(ix=0; finish==0; ix++)
	{
		c = input[ix];

		switch(c = input[ix])
		{
		case ')':		// end of prefix section
			*p = 0;
			state = 1;
			pre_bracket = 1;
			copy_rule_string(buf,state);
			p = buf;
			break;
			
		case '(':		// start of suffix section
			*p = 0;
			state = 2;
			copy_rule_string(buf,state);
			state = 3;
			p = buf;
			if(input[ix+1] == ' ')
			{
				fprintf(f_log,"%5d: Syntax error. Space after (\n",linenum);
				error_count++;
			}
			break;
			
		case '\n':		// end of line
		case '\r':
		case 0:			// end of line
			*p = 0;
			copy_rule_string(buf,state);
			finish=1;
			break;
			
		case '\t':		// end of section section
		case ' ':
			*p = 0;
			copy_rule_string(buf,state);
			p = buf;
			break;
			
		case '?':
			if(state==2)
				state=0;
			else
				*p++ = c;
			break;

		default:
			*p++ = c;
			break;
		}
	}
	
	if(strcmp(rule_match,"$group")==0)
		strcpy(rule_match,group_name);

	if(rule_match[0]==0)
	{
		if(rule_post[0] != 0)
		{
			fprintf(f_log,"%5d: Syntax error\n",linenum);
			error_count++;
		}
		return(NULL);
	}

	EncodePhonemes(rule_phonemes,buf,bad_phoneme);
	for(ix=0;; ix++)
	{
		if((c = buf[ix])==0) break;
		if(c==255)
		{
			fprintf(f_log,"%5d: Bad phoneme [%c] in %s\n",linenum,bad_phoneme[0],input);
			error_count++;
			break;
		}
	}
	strcpy(output,buf);
	len = strlen(buf)+1;
	
	len_name = strlen(group_name);
	if((len_name > 0) && (memcmp(rule_match,group_name,len_name) != 0))
	{
		utf8_in(&wc,rule_match);
		if((group_name[0] == '9') && IsDigit(wc))
		{
			// numeric group, rule_match starts with a digit, so OK
		}
		else
		{
			fprintf(f_log,"%5d: Wrong initial letters '%s' for group '%s'\n",linenum,rule_match,group_name);
			error_count++;
		}
	}
	strcpy(&output[len],rule_match);
	len += strlen(rule_match);

	if(debug_flag)
	{
		output[len] = RULE_LINENUM;
		output[len+1] = (linenum % 255) + 1;
		output[len+2] = (linenum / 255) + 1;
		len+=3;
	}

	if(rule_cond[0] != 0)
	{
		ix = -1;
		if(rule_cond[0] == '!')
		{
			// allow the rule only if the condition number is NOT set for the voice
			ix = atoi(&rule_cond[1]) + 32;
		}
		else
		{
			// allow the rule only if the condition number is set for the voice
			ix = atoi(rule_cond);
		}

		if((ix > 0) && (ix < 255))
		{
			output[len++] = RULE_CONDITION;
			output[len++] = ix;
		}
		else
		{
			fprintf(f_log,"%5d: bad condition number ?%d\n",linenum,ix);
			error_count++;
		}
	}
	if(rule_pre[0] != 0)
	{
		output[len++] = RULE_PRE;
		// output PRE string in reverse order
		for(ix = strlen(rule_pre)-1; ix>=0; ix--)
			output[len++] = rule_pre[ix];
	}

	if(rule_post[0] != 0)
	{
		sprintf(&output[len],"%c%s",RULE_POST,rule_post);
		len += (strlen(rule_post)+1);
	}
	output[len++]=0;
	prule = (char *)malloc(len);
	memcpy(prule,output,len);
	return(prule);
}  //  end of compile_rule


int __cdecl string_sorter(char **a, char **b)
{//===========================================
	char *pa, *pb;
	int ix;

   if((ix = strcmp(pa = *a,pb = *b)) != 0)
	   return(ix);
	pa += (strlen(pa)+1);
	pb += (strlen(pb)+1);
   return(strcmp(pa,pb));
}   /* end of string_sorter */


static int __cdecl rgroup_sorter(RGROUP *a, RGROUP *b)
{//===================================================
// Sort long names before short names
	int ix;
	ix = strlen(b->name) - strlen(a->name);
	if(ix != 0) return(ix);
	ix = strcmp(a->name,b->name);
	if(ix != 0) return(ix);
	return(a->start-b->start);
}


#ifdef OUTPUT_FORMAT
static void print_rule_group(FILE *f_out, int n_rules, char **rules, char *name)
{//=============================================================================
	int rule;
	int ix;
	unsigned char c;
	int len1;
	int len2;
	int spaces;
	char *p;
	char *pout;
	int condition;
	char buf[80];
	char suffix[12];

	static unsigned char symbols[] = {'@','&','%','+','#','$','D','Z','A','B','C','F'};

	fprintf(f_out,"\n$group %s\n",name);

	for(rule=0; rule<n_rules; rule++)
	{
		p = rules[rule];
		len1 = strlen(p) + 1;
		p = &p[len1];
		len2 = strlen(p);
		
		rule_match[0]=0;
		rule_pre[0]=0;
		rule_post[0]=0;
		condition = 0;

		pout = rule_match;
		for(ix=0; ix<len2; ix++)
		{
			switch(c = p[ix])
			{
			case RULE_PRE:
				*pout = 0;
				pout = rule_pre;
				break;
			case RULE_POST:
				*pout = 0;
				pout = rule_post;
				break;
			case RULE_CONDITION:
				condition = p[++ix];
				break;
			case RULE_ENDING:
				sprintf(suffix,"$%d[%x]",(p[ix+2]),p[ix+1] & 0x7f);
				ix += 2;
				strcpy(pout,suffix);
				pout += strlen(suffix);
				break;
			default:
				if(c <= RULE_LETTER7)
					c = symbols[c-RULE_SYLLABLE];
				if(c == ' ')
					c = '_';
				*pout++ = c;
				break;
			}
		}
		*pout = 0;
		
		spaces = 12;
		if(condition > 0)
		{
			sprintf(buf,"?%d ",condition);
			spaces -= strlen(buf);
			fprintf(f_out,"%s",buf);
		}

		if(rule_pre[0] != 0)
		{
			p = buf;
			for(ix=strlen(rule_pre)-1;ix>=0;ix--)
				*p++ = rule_pre[ix];
			sprintf(p,") ");
			spaces -= strlen(buf);
			for(ix=0; ix<spaces; ix++)
			   fputc(' ',f_out);
			fprintf(f_out,"%s",buf);
			spaces = 0;
		}
		
		for(ix=0; ix<spaces; ix++)
			fputc(' ',f_out);
		
		spaces = 14;
		sprintf(buf," %s ",rule_match);
		if(rule_post[0] != 0)
		{
			p = &buf[strlen(buf)];
			sprintf(p,"(%s ",rule_post);
		}
		fprintf(f_out,"%s",buf);
		spaces -= strlen(buf);

		for(ix=0; ix<spaces; ix++)
			fputc(' ',f_out);
		DecodePhonemes(rules[rule],buf);
		fprintf(f_out,"%s\n",buf);   // phonemes
	}
}
#endif


//#define LIST_GROUP_INFO
static void output_rule_group(FILE *f_out, int n_rules, char **rules, char *name)
{//==============================================================================
	int ix;
	int len1;
	int len2;
	int len_name;
	char *p;
	char *p2, *p3;
	const char *common;

	short nextchar_count[256];
	memset(nextchar_count,0,sizeof(nextchar_count));

	len_name = strlen(name);

#ifdef OUTPUT_FORMAT
	print_rule_group(f_log,n_rules,rules,name);
#endif

	// sort the rules in this group by their phoneme string
	common = "";
	qsort((void *)rules,n_rules,sizeof(char *),(int (__cdecl *)(const void *,const void *))string_sorter);

	if(strcmp(name,"9")==0)
		len_name = 0;    //  don't remove characters from numeric match strings

	for(ix=0; ix<n_rules; ix++)
	{
		p = rules[ix];
		len1 = strlen(p) + 1;  // phoneme string
		p3 = &p[len1];
		p2 = p3 + len_name;        // remove group name from start of match string
		len2 = strlen(p2);

		nextchar_count[(unsigned char)(p2[0])]++;   // the next byte after the group name

		if((common[0] != 0) && (strcmp(p,common)==0))
		{
			fwrite(p2,len2,1,f_out);
			fputc(0,f_out);		// no phoneme string, it's the same as previous rule
		}
		else
		{
			if((ix < n_rules-1) && (strcmp(p,rules[ix+1])==0))
			{
				common = rules[ix];   // phoneme string is same as next, set as common
				fputc(RULE_PH_COMMON,f_out);
			}

			fwrite(p2,len2,1,f_out);
			fputc(RULE_PHONEMES,f_out);
			fwrite(p,len1,1,f_out);
		}
	}

#ifdef LIST_GROUP_INFO
	for(ix=32; ix<256; ix++)
	{
		if(nextchar_count[ix] > 30)
			printf("Group %s   %c  %d\n",name,ix,nextchar_count[ix]);
	}
#endif
}  //  end of output_rule_group



static int compile_lettergroup(char *input, FILE *f_out)
{//=====================================================
	char *p;
	char *p_start;
	int group;
	int ix;
	int n_items;
	int length;
	int max_length = 0;

	#define N_LETTERGP_ITEMS 200
	char *items[N_LETTERGP_ITEMS];
	char item_length[N_LETTERGP_ITEMS];

	p = input;
	if(!isdigit(p[0]) || !isdigit(p[1]))
	{
		fprintf(f_log,"%5d: Expected 2 digits after '.L'\n",linenum);
		error_count++;
		return(1);
	}

	group = atoi(&p[0]);
	if(group >= N_LETTER_GROUPS)
	{
		fprintf(f_log,"%5d: lettergroup out of range (01-%.2d)\n",linenum,N_LETTER_GROUPS-1);
		error_count++;
		return(1);
	}

	while(!isspace2(*p)) p++;

	fputc(RULE_GROUP_START,f_out);
	fputc(RULE_LETTERGP2,f_out);
	fputc(group + 'A', f_out);
	if(letterGroupsDefined[group] != 0)
	{
		fprintf(f_log,"%5d: lettergroup L%.2d is already defined\n",linenum,group);
		error_count++;
	}
	letterGroupsDefined[group] = 1;

	n_items = 0;
	while(n_items < N_LETTERGP_ITEMS)
	{
		while(isspace2(*p)) p++;
		if(*p == 0)
			break;

		items[n_items] = p_start = p;
		while((*p & 0xff) > ' ')
		{
			p++;
		}
		*p++ = 0;
		length = p - p_start;
		if(length > max_length)
			max_length = length;
		item_length[n_items++] = length;
	}

	// write out the items, longest first
	while(max_length > 1)
	{
		for(ix=0; ix < n_items; ix++)
		{
			if(item_length[ix] == max_length)
			{
				fwrite(items[ix],1,max_length,f_out);
			}
		}
		max_length--;
	}

	fputc(RULE_GROUP_END,f_out);

	return(0);
}


static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
{//====================================================================
	char *prule;
	unsigned char *p;
	int ix;
	int c;
	int gp;
	FILE *f_temp;
	int n_rules=0;
	int count=0;
	int different;
	int wc;
	const char *prev_rgroup_name;
	unsigned int char_code;
	int compile_mode=0;
	char *buf;
	char buf1[200];
	char *rules[N_RULES];

	int n_rgroups = 0;
	int n_groups3 = 0;
	RGROUP rgroup[N_RULE_GROUP2];
	
	linenum = 0;
	group_name[0] = 0;

	if((f_temp = fopen_log(fname_temp,"wb")) == NULL)
		return(1);

	for(;;)
	{
		linenum++;
		buf = fgets(buf1,sizeof(buf1),f_in);
		if(buf != NULL)
		{
			if((p = (unsigned char *)strstr(buf,"//")) != NULL)
				*p = 0;

			if(buf[0] == '\r') buf++;  // ignore extra \r in \r\n 
		}

		if((buf == NULL) || (buf[0] == '.'))
		{
			// next .group or end of file, write out the previous group

			if(n_rules > 0)
			{
				strcpy(rgroup[n_rgroups].name,group_name);
				rgroup[n_rgroups].group3_ix = group3_ix;
				rgroup[n_rgroups].start = ftell(f_temp);
				output_rule_group(f_temp,n_rules,rules,group_name);
				rgroup[n_rgroups].length = ftell(f_temp) - rgroup[n_rgroups].start;
				n_rgroups++;

				count += n_rules;
			}
			n_rules = 0;

			if(compile_mode == 2)
			{
				// end of the character replacements section
				fwrite(&n_rules,1,4,f_out);   // write a zero word to terminate the replacemenmt list
				compile_mode = 0;
			}

			if(buf == NULL) break;   // end of file

			if(memcmp(buf,".L",2)==0)
			{
				compile_lettergroup(&buf[2], f_out);
				continue;
			}

			if(memcmp(buf,".replace",8)==0)
			{
				compile_mode = 2;
				fputc(RULE_GROUP_START,f_out);
				fputc(RULE_REPLACEMENTS,f_out);

				// advance to next word boundary
				while((ftell(f_out) & 3) != 0)
					fputc(0,f_out);
			}

			if(memcmp(buf,".group",6)==0)
			{
				compile_mode = 1;

				p = (unsigned char *)&buf[6];
				while((p[0]==' ') || (p[0]=='\t')) p++;    // Note: Windows isspace(0xe1) gives TRUE !
				ix = 0;
				while((*p > ' ') && (ix < LEN_GROUP_NAME))
					group_name[ix++] = *p++;
				group_name[ix]=0;
				group3_ix = 0;

				if(sscanf(group_name,"0x%x",&char_code)==1)
				{
					// group character is given as a character code (max 16 bits)
					p = (unsigned char *)group_name;
	
					if(char_code > 0x100)
					{
						*p++ = (char_code >> 8);
					}
					*p++ = char_code;
					*p = 0;
				}
				else
				{
					if(translator->letter_bits_offset > 0)
					{
						utf8_in(&wc, group_name);
						if(((ix = (wc - translator->letter_bits_offset)) >= 0) && (ix < 128))
						{
							group3_ix = ix+1;   // not zero
						}
					}
				}
	
				if((group3_ix == 0) && (strlen(group_name) > 2))
				{
					if(utf8_in(&c,group_name) < 2)
					{
						fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum);
						error_count++;
					}
	
					group_name[2] = 0;
				}
			}

			continue;
		}
		
		switch(compile_mode)
		{
		case 1:    //  .group
			prule = compile_rule(buf);
			if((prule != NULL) && (n_rules < N_RULES))
			{
				rules[n_rules++] = prule;
			}
			break;

		case 2:   //  .replace
			{
				int replace1;
				int replace2;
				char *p;

				p = buf;
				replace1 = 0;
				replace2 = 0;
				while(isspace2(*p)) p++;
				ix = 0;
				while((unsigned char)(*p) > 0x20)   // not space or zero-byte
				{
					p += utf8_in(&c,p);
					replace1 += (c << ix);
					ix += 16;
				}
				while(isspace2(*p)) p++;
				ix = 0;
				while((unsigned char)(*p) > 0x20)
				{
					p += utf8_in(&c,p);
					replace2 += (c << ix);
					ix += 16;
				}
				if(replace1 != 0)
				{
					Write4Bytes(f_out,replace1);   // write as little-endian
					Write4Bytes(f_out,replace2);   // if big-endian, reverse the bytes in LoadDictionary()
				}
			}
			break;
		}
	}
	fclose(f_temp);

	qsort((void *)rgroup,n_rgroups,sizeof(rgroup[0]),(int (__cdecl *)(const void *,const void *))rgroup_sorter);

	if((f_temp = fopen(fname_temp,"rb"))==NULL)
		return(2);

	prev_rgroup_name = "\n";

	for(gp = 0; gp < n_rgroups; gp++)
	{
		fseek(f_temp,rgroup[gp].start,SEEK_SET);

		if((different = strcmp(rgroup[gp].name, prev_rgroup_name)) != 0)
		{
			// not the same as the previous group
			if(gp > 0)
				fputc(RULE_GROUP_END,f_out);
			fputc(RULE_GROUP_START,f_out);

			if(rgroup[gp].group3_ix != 0)
			{
				n_groups3++;
				fputc(1,f_out);
				fputc(rgroup[gp].group3_ix, f_out);
			}
			else
			{
				fprintf(f_out, "%s", prev_rgroup_name = rgroup[gp].name);
			}
			fputc(0,f_out);
		}

		for(ix=rgroup[gp].length; ix>0; ix--)
		{
			c = fgetc(f_temp);
			fputc(c,f_out);
		}

		if(different)
		{
		}
	}
	fputc(RULE_GROUP_END,f_out);
	fputc(0,f_out);

	fclose(f_temp);
	remove(fname_temp);

	fprintf(f_log,"\t%d rules, %d groups (%d)\n\n",count,n_rgroups,n_groups3);
	return(0);
}  //  end of compile_dictrules



int CompileDictionary(const char *dsource, const char *dict_name, FILE *log, char *fname_err, int flags)
{//=====================================================================================================
// fname:  space to write the filename in case of error
// flags: bit 0:  include source line number information, for debug purposes.

	FILE *f_in;
	FILE *f_out;
	int offset_rules=0;
	int value;
	char fname_in[sizeof(path_home)+45];
	char fname_out[sizeof(path_home)+15];
	char fname_temp[sizeof(path_home)+15];
	char path[sizeof(path_home)+40];       // path_dsource+20

	error_count = 0;
	error_need_dictionary = 0;
	memset(letterGroupsDefined,0,sizeof(letterGroupsDefined));

	debug_flag = flags & 1;

	if(dsource == NULL)
		dsource = "";

	f_log = log;
//f_log = fopen("log2.txt","w");
	if(f_log == NULL)
		f_log = stderr;

	// try with and without '.txt' extension
	sprintf(path,"%s%s_",dsource,dict_name);
	sprintf(fname_in,"%srules.txt",path);
	if((f_in = fopen(fname_in,"r")) == NULL)
	{
		sprintf(fname_in,"%srules",path);
		if((f_in = fopen_log(fname_in,"r")) == NULL)
		{
			if(fname_err)
				strcpy(fname_err,fname_in);
			return(-1);
		}
	}

	sprintf(fname_out,"%s%c%s_dict",path_home,PATHSEP,dict_name);
	if((f_out = fopen_log(fname_out,"wb+")) == NULL)
	{
		if(fname_err)
			strcpy(fname_err,fname_in);
		return(-1);
	}
	sprintf(fname_temp,"%s%ctemp",path_home,PATHSEP);

	value = N_HASH_DICT;
	Write4Bytes(f_out,value);
	Write4Bytes(f_out,offset_rules);

	compile_dictlist_start();

	fprintf(f_log,"Using phonemetable: '%s'\n",phoneme_tab_list[phoneme_tab_number].name);
	compile_dictlist_file(path,"roots");
	if(translator->langopts.listx)
	{
		compile_dictlist_file(path,"list");
		compile_dictlist_file(path,"listx");
	}
	else
	{
		compile_dictlist_file(path,"listx");
		compile_dictlist_file(path,"list");
	}
	compile_dictlist_file(path,"extra");
	
	compile_dictlist_end(f_out);
	offset_rules = ftell(f_out);
	
	fprintf(f_log,"Compiling: '%s'\n",fname_in);

	compile_dictrules(f_in,f_out,fname_temp);
	fclose(f_in);

	fseek(f_out,4,SEEK_SET);
	Write4Bytes(f_out,offset_rules);
	fclose(f_out);

	LoadDictionary(translator, dict_name, 0);

	return(error_count);
}  //  end of compile_dictionary

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The BSD License

Written By

Jozef Bozek

CEO bring-it-together s.r.o.

Slovakia

Jozef Božek is currently a software engineer at bring-it-together s.r.o. in area of large scale infomation systems and mobile applications development.
He has been developing in C++ nearly full time since 2000, in Java since 2004 and in Objective-C since 2009. He is programming using Java EE SDK, iOS SDK, COM/DCOM, MFC, ATL, STL and so on Smile | :)

ESpeakEngine - Objective-C speech synthesizer

License

Comments and Discussions