Download the DNA Library Designer (DNALD)
// This is a single line comment. Everything up to the end of the line is
// ignored. (This is another single line comment.)
# This is also a single line comment, if you prefer using a single hash.
/* This is a multi-line comment. Anything up to and including the closing
* symbol (asterisk and slash together) is ignored, which means that they can
* also be used to add comments within an interpreted line (see below).
*
* The preceding * are optional but inserted automatically for clarity.
*/
/**
* This is a documentation comment (or docstring), which is also a multi-line
* comment, distinguished by the second *, which will be associated with the
* following section.
*
* Libraries are main objects of DNALD. They allow you to specify a set of
* output DNA sequences, for synthesis, as combinations of existing ("natural")
* input DNA fragments and new ("synthetic") intermediate DNA fragments, using
* definitions and expressions of definitions, sequences and operations.
*
* This example demonstrates the structure and syntax of a DNALD DNA library
* design, the available operations and their syntax and semantics.
*
*/
library tutorial {
/*
* The symbolic keyword "library" MUST be followed by a name, which is used to
* create a namespace for the definitions within. The beginning and end of a
* library is denoted by an open curly brace and matching closing curly brace
* after the final definition.
*
* DNALD names must match the following pattern:
* '^'?('a'..'z'|'A'..'Z'|'_'|"5'"|"3'") ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
* which can be interpreted as: an optional hat character (used to distinguish
* names that are also keywords); followed by either a lower or upper case
* letter, an underscore or a 5' or 3' character pair; followed by zero or
* more alphanumerics and underscores.
*/
/**
* The symbolic keyword "inputs" denotes the inputs section which again
* begins and ends with open and closed curly braces respectively.
*
* Within the inputs section definitions of sequences are specified,
* corresponding to natural DNA fragments, that can be provided by you,
* the library designer, to the library manufacturer.
*/
inputs {
input1 := 'acgt'
/**
* Definitions begin with a name statisfying the rules above, followed by
* the definition symbol := and a DNALD expression (examples of which are
* shown below).
*
* Sequences MUST be quoted using a pair of either 'single' or "double"
* speech marks.
* Sequence can include numbers and whitespace (spaces, tabs or
* end-of-line characters), which will be stripped automatically. This
* enables cut and paste from the sequence sections of various formats.
* Each letter in a sequence MUST be one of 'a', 'c', 'g' or 't'. // NOT currently enforced
* Lowercase and uppercase letters are allowed, as are a mix of cases.
*/
input2 := 'atcgt'
} # inputs end
/**
* Intermediates are definitions in the library but NOT in the input
* or output sections.
*/
# synthetic sequences
5'end := 'atg'
3'end := 'uua'
# sequences (the results of expressions) are concatenated using only whitespace
concatenation := 5'end input1
# the "is" keyword asserts than the expression following it is equivalent
assertion := concatenation
is 5'end 'acgt' # you can use assertions when refactoring
is 'atg' input1 # to check refactored expressions
is 'atg' 'acgt' # against previous correct ones
reference := input1
# subsequences
nucleotide := input1[2] is 'c' # the second nucleotide of input
subsequence := input1[2:3] is 'cg'
from_i_to_end := input1[2:end] is 'cgt' # use end for the rest of the sequence
# insertions
insertion_with_concatenation := input1[1] 'AAA' input1[4:end]
insertion_with_mutation := input1[2 = 'tc']
is 'atcgt'
is input2
# reverse, complement and their composition
^reverse := reverse('acca')
is 'acca'
^complement := complement('acca')
is 'tggt'
# the hat symbol ^ enables use of a keyword as a name
/**
* reverse complements are special because they are the other stand
* of their own reverse complement
*/
reverse_complement := reverse(complement('acca'))
is complement(reverse('acca'))
is 'tggt'
# repetitions
repetition1 := 'acg' * 2 is 'acgacg'
repetition2 := 2 * 'acg' is 'acgacg'
# the order of repetitions is irrelevant for single expressions
repetitions := 3 * 'ac' * 2
is 2 * 'ac' * 3
# the order of repetitions is relevent for multiple expressions
repetitions_multiple
:= 3 * ac GT * 2
is 2 * ac GT * 3 # warning
# parentheses can be used to wrap multiple expressions into a single
repetitions_multiple_parenthesised
:= 3 * (ac GT) * 2
is 2 * (ac GT) * 3
# different coloured fragments used by repetitions_multiple* above
ac := 'ac'
GT := 'GT'
variable_length_repetition := 'at' * 2:3 is 'atat' + 'atatat'
# mutations, deletions and insertions
mutation := input1[2:3='AA'] // an expression which replaces a mutated subsequence
deletion := input1[2:3='A'] // when the mutation is shorter than the subsequence
insertion := input1[2:3='AAA'] // when the mutation is longer than the subsequence
# multiple mutations
SNP := input1[4='A'] is 'acgA'
SNPs := input1[2='',4='A'] is 'agA' // multiple mutations are applied simultaneously to preserve indices
sequential_SNPs := input1[2=''][3='A'] is 'agA'// OK but sequential mutations do not preserve indices
// sequential_SNPs2 := input1[2=''][4='A'] // not OK because sequential mutations do not preserve indices
// creating sets of sequences
// "+" or "union" creates a set of sequences
union1 := input1 + input2 is (input1 + input2) // sometimes parentheses are optional
is input2 + input1 is (input2 + input1) // sometimes (with other set operations) they are not
is (input1 + input2) is (input2 + input1) // parentheses help delineate groups
// a choice is also creates a set but implies that only one sequence is required
choice := input1 | input2
is input2 | input1
// mutation with a set of sequences
three_mutations := input1[2:3='AA' + 'A' + 'AAA']
is mutation + deletion + insertion
// order of set operations
union2 := concatenation + input1
is input1 + concatenation // order is irrelevant
is 'atgacgt' + 'acgt'
intersection := union1 & union2
is union2 & union1 // order is irrelevant
is input1
symmetric_difference := union1 ^ union2
is union2 ^ union1 // order is irrelevant
is concatenation + input2
is 'atgacgt' + 'atcgt'
difference1 := union1 - union2
is union2 - union1 // order is relevant
is input2
is 'atcgt'
difference2 := union2 - union1
is union1 - union2 // order is relevant
is concatenation
is 'atg' 'acgt'
is 'atgacgt'
/**
* The outputs section is similar to the inputs section, except that
* at least one output must be defined.
*
* These are the definitions that will be manufactured from the
* library design.
*/
outputs {
/**
* Now it's your turn. Any valid expression combining those definitions
* above will appear at the bottom of the library view below.
*/
output1 := ''
} # outputs end
}
/**
* Codon tables are used to translate nucleotide sequences and back-translate
* amino acid sequences.
*
* The default codon table is the E. coli standard genetic code from
* http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=37762&aa=1&style=N
* based on 8087 CDS's (2,330,943 codons)
*/
Codon Table DefaultCodonTable {
/*
* A set of codons is specified by either the full name or three letter code
* of an amino acid, followed by the definition symbol := and a series of
* codons separated by the choice symbol |.
* Codons MUST be specified for each of the 20 standard amino acids and the
* untranslatable stop codons.
* Stop codons are named "stop" or "Ter".
* Alternate start codons are named "start".
*/
Alanine /* A */ := 'GCA' 0.27 | 'GCC' 0.26 | 'GCG' 0.25 | 'GCT' #0.22
Arginine /* R */ := 'CGT' 0.30 | 'CGC' 0.26 | 'CGG' 0.15 | 'AGA' 0.13 | 'CGA' 0.09 | 'AGG' #0.07
// full name or three letter code codon (quoted) weight choice redundant weight when sum < 0
Asn /* Asparagine, N */ := 'AAT' 0.59 | 'AAC' #0.41 #
Aspartic acid /* D */ := 'GAT' 0.65 | 'GAC' #0.35
Cysteine /* C */ := 'TGT' 0.52 | 'TGC' #0.48
Glutamic acid /* E */ := 'GAA' 0.64 | 'GAG' #0.36
Gln /* Glutamine, Q */ := 'CAG' 0.65 | 'CAA' #0.35
Glycine /* G */ := 'GGT' 0.34 | 'GGC' 0.29 | 'GGA' 0.19 | 'GGG' #0.18
Histidine /* H */ := 'CAT' 0.63 | 'CAC' 0.37
Ile /* I */ := 'ATT' 0.47 | 'ATC' 0.31 | 'ATA' 0.21 # Isoleucine
Leucine /* L */ := 'CTG' 0.38 | 'TTA' 0.18 | 'CTT' 0.15 | 'TTG' 0.13 | 'CTC' 0.10 | 'CTA' 0.06
Lysine /* K */ := 'AAA' 0.71 | 'AAG' #0.29
Methionine /* M */ := 'ATG' #1.00
Phenylalanine /* F */ := 'TTT' 0.64 | 'TTC' #0.36
Proline /* P */ := 'CCG' 0.37 | 'CCT' 0.24 | 'CCA' 0.23 | 'CCC' #0.16
// Pyl /* O */ := '' // Pyrrolysine
// Sec /* U */ := '' // Selenocysteine
Serine /* S */ := 'AGC' 0.20 | 'AGT' 0.18 | 'TCA' 0.18 | 'TCT' 0.18 | 'TCC' 0.14 | 'TCG' #0.11
Ter /*stop*/ /* * */ := 'TAA' 0.58 | 'TGA' 0.33 | 'TAG' #0.09 # stop
Threonine /* T */ := 'ACC' 0.31 | 'ACA' 0.25 | 'ACG' | 'ACG' #0.22 (each)
Trp /* W */ := 'TGG' #1.00
Tyrosine /* Y */ := 'TAT' 0.65 | 'TAC' #0.35
Valine /* V */ := 'GTT' 0.32 | 'GTG' 0.29 | 'GTA' 0.19 | 'GTC' #0.19
/*
* Numeric weights following the codon sequence are used to probablistically
* choose a given codon when back-translating. They correspond to the
* determined (or desired) codon usage frequencies of the target organism.
* Weights are optional if they are no weights given for that amino acid or if
* the weights that are given sum to less than 1.
* The remainder (1 is no weights are given) is divided equally between the
* unweighted codons. For example, in the default codon table the weights of
* the last two codons of threonine can be commented out because their weights
* are equal and the sum of those is equal to the remainder of 1 minus the sum
* of the weights of the first two codons.
* Weights may be greater than 1 and can sum to greater than 1, however, in
* which case ALL of the codons must be given weights, because it is not
* possible determine the remainder otherwise.
*
* Not every codon has to be used, but those that are must be unique.
* Not using a particular codon is another way of biasing codon choice for
* back-translated sequences. However, translation in DNALD also relies on
* codon tables and an error will be flagged if a triplet in a reading frame
* cannot be matched to an amino acid. Therefore it may be best to specify all
* codons but give those that are unwanted a weight of 0.
*/
}