BibTeX entries in bib/cstr.bib

@MISC{cstr:www,
  key           = {{\relax CSTR}},
  title         = {{Centre for Speech Technology Research, University of Edinburgh}},
  howpublished  = {WWW page},
  year          = {1999},
  url           = {http://www.cstr.ed.ac.uk/},
  pub-url       = {http://www.cstr.ed.ac.uk/projects/festival/papers.html},
  note          = {\url{http://www.cstr.ed.ac.uk/}},
}
@INPROCEEDINGS{cstr:unitsel96,
  author        = {A. J. Hunt and A. W. Black},
  title         = {Unit Selection in a Concatenative Speech Synthesis System using a Large Speech Database},
  booktitle     = {Proc. ICASSP '96},
  address       = {Atlanta, GA},
  month         = {May},
  year          = {1996},
  pages         = {373--376},
  note          = {\bibcstrurl\ \bibfilename{cstr/Black\_1996\_a.s}},
  remarks       = {cited in \cite{cslu:esca98mm}},
  abstract      = {One approach to the generation of natural-sounding synthesized speech waveforms is to select and concatenate units from a large speech database. Units (in the current work, phonemes) are selected to produce a natural realisation of a target phoneme sequence predicted from text which is annotated with prosodic and phonetic context information. We propose that the units in a synthesis database can be considered as a state transition network in which the state occupancy cost is the distance between a database unit and a target, and the transition cost is an estimate of the quality of concatenation of two consecutive units. This framework has many similarities to HMM-based speech recognition. A pruned Viterbi search is used to select the best units for synthesis from the database. This approach to waveform synthesis permits training from natural speech: two meth ods for training from speech are presented which provide weights which produce more natural speech than can be obtained by handtuning.},
}
@INPROCEEDINGS{cstr:unitsel97,
  author        = {Alan W Black and Paul Taylor},
  title         = {Automatically Clustering Similar Units for Unit Selection in Speech Synthesis},
  booktitle     = {Proc. Eurospeech '97},
  address       = {Rhodes, Greece},
  month         = {September},
  year          = {1997},
  pages         = {601--604},
  note          = {\bibcstrurl\ \bibfilename{cstr/Black\_1997\_b}},
  remarks       = {cited in \cite{cslu:esca98mm}: clustering and decision trees},
  abstract      = {This paper describes a new method for synthesizing speech by concatenating sub-word units from a database of labelled speech. A large unit inventory is created by automatically clustering units of the same phone class based on their phonetic and prosodic context. The appropriate cluster is then selected for a target unit offering a small set of candidate units. An optimal path is found through the candidate units based on their distance from the cluster center and an acoustically based join cost. Details of the method and justification are presented. The results of experiments using two different databases are given, optimising various parameters within the system. Also a comparison with other existing selection based synthesis techniques is given showing the advantages this method has over existing ones. The method is implemented within a full text-to-speech system offering efficient natural sounding speech synthesis.},
}
@INPROCEEDINGS{cstr:eursp95,
  author        = {A. W. Black and N. Campbell},
  title         = {Optimising selection of units from speech databases for concatenative synthesis},
  booktitle     = {Proc. Eurospeech '95},
  volume        = {1},
  address       = {Madrid, Spain},
  month         = {September},
  year          = {1995},
  pages         = {581--584},
  note          = {},
  remarks       = {Summary: Detailed description of unit selection model, used features and context, concatenation join point optimisation. Description of weight optimising procedure: euclidian cepstral distance (very limited first attempt) on real-speech test sentences. Unit selection as used in CHATR. cited in \cite{cslu:esca98mm}},
}
@INPROCEEDINGS{cstr:ssml97,
  author        = {Richard Sproat and Paul Taylor and Michael Tanenblatt and Amy Isard},
  title         = {A Markup Language for Text-To-Speech Synthesis},
  booktitle     = {Proc. Eurospeech '97},
  address       = {Rhodes, Greece},
  month         = {September},
  year          = {1997},
  pages         = {1747--1750},
  note          = {\bibcstrurl\ \bibfilename{cstr/Sproat\_1997\_a}},
  abstract      = {Text-to-speech synthesizers must process text, and therefore require some knowledge of text structure. While many TTS systems allow for user control by means of ad hoc `escape sequences', there remains to date no adequate and generally agreed upon system-independent standard for marking up text for the purposes of synthesis. The present paper is a collaborative effort between two speech groups aimed at producing such a standard, in the form of an SGML-based markup language that we call STML --- Spoken Text Markup Language. The primary purpose of this paper is not to present STML as a fait accompli, but rather to interest other TTS research groups to collaborate and contribute to the development of this standard.},
}
@TECHREPORT{cstr:festival97,
  author        = {Alan Black and Paul Taylor},
  title         = {{The Festival Speech Synthesis System: System Documentation (1.1.1)}},
  institution   = {Human Communication Research Centre},
  type          = {Technical Report},
  number        = {HCRC/TR-83},
  month         = {January},
  year          = {1997},
  pages         = {154},
  note          = {\bibcstrurl},
  url           = {http://www.cstr.ed.ac.uk/projects/festival/manual-1.1.1/festival-1.1.1.ps.gz},
  remarks       = {new version \cite{cstr:festival98}},
  abstract      = {},
}
@TECHREPORT{cstr:festival98,
  author        = {Alan Black and Paul Taylor and Richard Caley},
  title         = {{The Festival Speech Synthesis System: System Documentation (1.3.1)}},
  institution   = {Human Communication Research Centre},
  type          = {Technical Report},
  number        = {HCRC/TR-83},
  month         = {December},
  year          = {1998},
  pages         = {202},
  note          = {\bibcstrurl},
  url           = {http://www.cstr.ed.ac.uk/projects/festival/manual-1.3.1/festival_toc.html},
  remarks       = {updated version of \cite{cstr:festival98}, new utterance structure as in \cite{cstr:festivalarch98}, multiple synthesizers},
  abstract      = {},
}
@TECHREPORT{cstr:festivalarch98,
  author        = {Paul Taylor},
  title         = {{The Festival Speech Architecture}},
  type          = {Web Page},
  year          = {1999},
  note          = {\bibcstrurl},
  url           = {http://www.cstr.ed.ac.uk/projects/festival/arch.html},
  remarks       = {},
  abstract      = {This is a short document describing the way we represent speech and linguistic structures in Festival. There are three main types of structure: \begin{description} \item [Items] An item is a single linguistic unit, such as a phone, word, syllable, syntactic node, intonation phrase etc. Each item has a set of features which describe its local properties. For instance a word could have features, , , ... Values of features can be real values or functions. \item [Relations] A relation links together items of a common linguistic type. For instance there we might have a word, phone, syntax or syllable relation. Relations are general graph structures, the most common type being a simple doubly linked list. Eg. the word relation is a doubly linked list that links all the words in an utterance in the order they occur in. Relations can also take the form of trees. For example, we have a syllable structure relation which gives onset, coda, nucleus and rhyme structure for a syllable. The crucial aspect of the Festival architecture is that items can be in more than one relation. For example, a syntax relation is a tree whose terminal elements are words, which are also in the word relation. \item [Utterances] Utterances contain a list of all the relations. \end{description}},
}
@INPROCEEDINGS{Campbell_FactAffe_EURO97,
  author        = {Nick Campbell and Itoh Yoshiharu and Wen Ding and Norio Higuchi},
  title         = {Factors Affecting Perceived Quality and Intelligibility in the {CHATR} Concatenative Speech Synthesiser},
  booktitle     = {Proc. Eurospeech '97},
  address       = {Rhodes, Greece},
  month         = {September},
  year          = {1997},
  pages         = {2635--2638},
  remarks       = {\tbf},
}
@ARTICLE{Campbell_CHATR,
  author        = {N. Campbell},
  title         = {{CHATR}: {A} High-Definition Speech Re-Sequencing System},
  journal       = {Acoustical Society of America and Acoustical Society of Japan, Third Joint Meeting},
  address       = {Honolulu, HI},
  month         = {December},
  year          = {1996},
  remarks       = {\tbf},
}

This document was translated from LATEX by HEVEA.