Implementing new model types

For convenience, AbstractFBCModels defines a "canonical" implementation of a FBC model: a completely generic data structure that can store exactly the complete information that is representable by the AbstractFBCModel accessors, and nothing else.

The type is not useful for actually constructing the models, but may serve a good purpose in several other cases:

  • If you need an "intermediate step" when converting complicated FBC models to other types, the canonical model is guaranteed not to lose any information, yet perform relatively well when re-exporting the information via the accessors.
  • If you need to make quick modifications to another model type that does not admin easy mutation (e.g., it is made of immutable structs), you can convert to the canonical model and make the small fixes in there.
  • Here, we use it for describing how to "perform" your own definition of model type, and demonstrate the use of the pre-defined testing framework on it.

The model is available for use as AbstractFBCModels.CanonicalModel.Model

Defining the model

For convenience in the later explanation, we list the whole definition of the module here:


module CanonicalModel

using DocStringExtensions

import ..AbstractFBCModels as A
import Serialization as S
import SparseArrays: sparse, findnz

"""
$(TYPEDEF)

A canonical Julia representation of a reaction in the `AbstractFBCModels` interface.

# Fields
$(TYPEDFIELDS)
"""
Base.@kwdef mutable struct Reaction
    name::A.Maybe{String} = nothing
    lower_bound::Float64 = -Inf
    upper_bound::Float64 = Inf
    stoichiometry::Dict{String,Float64} = Dict()
    objective_coefficient::Float64 = 0.0
    gene_association_dnf::A.Maybe{A.GeneAssociationDNF} = nothing
    annotations::A.Annotations = A.Annotations()
    notes::A.Notes = A.Notes()
end

Base.show(io::Base.IO, ::MIME"text/plain", x::Reaction) = A.pretty_print_kwdef(io, x)

"""
$(TYPEDEF)

A canonical Julia representation of a metabolite in the `AbstractFBCModels` interface.

# Fields
$(TYPEDFIELDS)
"""
Base.@kwdef mutable struct Metabolite
    name::A.Maybe{String} = nothing
    compartment::A.Maybe{String} = nothing
    formula::A.Maybe{A.MetaboliteFormula} = nothing
    charge::A.Maybe{Int} = nothing
    balance::Float64 = 0.0
    annotations::A.Annotations = A.Annotations()
    notes::A.Notes = A.Notes()
end

Base.show(io::Base.IO, ::MIME"text/plain", x::Metabolite) = A.pretty_print_kwdef(io, x)

"""
$(TYPEDEF)

A canonical Julia representation of a gene in the `AbstractFBCModels` interface.

# Fields
$(TYPEDFIELDS)
"""
Base.@kwdef mutable struct Gene
    name::A.Maybe{String} = nothing
    annotations::A.Annotations = A.Annotations()
    notes::A.Notes = A.Notes()
end

Base.show(io::Base.IO, ::MIME"text/plain", x::Gene) = A.pretty_print_kwdef(io, x)

"""
$(TYPEDEF)

A canonical Julia representation of a row in a coupling matrix of the
`AbstractFBCModels` interface.

# Fields
$(TYPEDFIELDS)
"""
Base.@kwdef mutable struct Coupling
    name::A.Maybe{String} = nothing
    reaction_weights::Dict{String,Float64} = Dict()
    lower_bound::Float64 = -Inf
    upper_bound::Float64 = Inf
    annotations::A.Annotations = A.Annotations()
    notes::A.Notes = A.Notes()
end

Base.show(io::Base.IO, ::MIME"text/plain", x::Coupling) = A.pretty_print_kwdef(io, x)

"""
$(TYPEDEF)

A canonical Julia representation of a metabolic model that sotres exactly the
data represented by `AbstractFBCModels` accessors.

The implementation is useful for manipulating model data manually without
writing new model types, or even for constructing models from base data in many
simple cases.

Additionally, you can use the implementation of accessors for this model type
in the source code of `AbstractFBCModels` as a starting point for creating an
`AbstractFBCModel` interface for your own models.

# Fields
$(TYPEDFIELDS)
"""
Base.@kwdef mutable struct Model <: A.AbstractFBCModel
    reactions::Dict{String,Reaction} = Dict()
    metabolites::Dict{String,Metabolite} = Dict()
    genes::Dict{String,Gene} = Dict()
    couplings::Dict{String,Coupling} = Dict()
end

Base.show(io::Base.IO, ::MIME"text/plain", x::Model) = A.pretty_print_kwdef(io, x)

A.reactions(m::Model) = sort(collect(keys(m.reactions)))
A.metabolites(m::Model) = sort(collect(keys(m.metabolites)))
A.genes(m::Model) = sort(collect(keys(m.genes)))
A.couplings(m::Model) = sort(collect(keys(m.couplings)))
A.n_reactions(m::Model) = length(m.reactions)
A.n_metabolites(m::Model) = length(m.metabolites)
A.n_genes(m::Model) = length(m.genes)
A.n_couplings(m::Model) = length(m.couplings)
A.reaction_name(m::Model, id::String) = m.reactions[id].name
A.metabolite_name(m::Model, id::String) = m.metabolites[id].name
A.gene_name(m::Model, id::String) = m.genes[id].name
A.coupling_name(m::Model, id::String) = m.couplings[id].name
A.reaction_annotations(m::Model, id::String) = m.reactions[id].annotations
A.metabolite_annotations(m::Model, id::String) = m.metabolites[id].annotations
A.gene_annotations(m::Model, id::String) = m.genes[id].annotations
A.coupling_annotations(m::Model, id::String) = m.couplings[id].annotations
A.reaction_notes(m::Model, id::String) = m.reactions[id].notes
A.metabolite_notes(m::Model, id::String) = m.metabolites[id].notes
A.gene_notes(m::Model, id::String) = m.genes[id].notes
A.coupling_notes(m::Model, id::String) = m.couplings[id].notes

function A.stoichiometry(m::Model)
    midxs = Dict(mid => idx for (idx, mid) in enumerate(A.metabolites(m)))
    I = Int[]
    J = Int[]
    V = Float64[]
    for (ridx, rid) in enumerate(A.reactions(m))
        for (smid, v) in m.reactions[rid].stoichiometry
            push!(I, midxs[smid])
            push!(J, ridx)
            push!(V, v)
        end
    end
    sparse(I, J, V, A.n_metabolites(m), A.n_reactions(m))
end

function A.coupling(m::Model)
    ridxs = Dict(rid => idx for (idx, rid) in enumerate(A.reactions(m)))
    I = Int[]
    J = Int[]
    V = Float64[]
    for (cidx, cid) in enumerate(A.couplings(m))
        for (rid, v) in m.couplings[cid].reaction_weights
            push!(I, cidx)
            push!(J, ridxs[rid])
            push!(V, v)
        end
    end
    sparse(I, J, V, A.n_couplings(m), A.n_reactions(m))
end

A.bounds(m::Model) = (
    [m.reactions[rid].lower_bound for rid in A.reactions(m)],
    [m.reactions[rid].upper_bound for rid in A.reactions(m)],
)

A.coupling_bounds(m::Model) = (
    [m.couplings[cid].lower_bound for cid in A.couplings(m)],
    [m.couplings[cid].upper_bound for cid in A.couplings(m)],
)

A.balance(m::Model) =
    sparse(Float64[m.metabolites[mid].balance for mid in A.metabolites(m)])
A.objective(m::Model) =
    sparse(Float64[m.reactions[rid].objective_coefficient for rid in A.reactions(m)])

A.reaction_gene_association_dnf(m::Model, id::String) = m.reactions[id].gene_association_dnf
A.reaction_gene_products_available(m::Model, id::String, fn::Function) =
    A.reaction_gene_products_available_from_dnf(m, id, fn)
A.reaction_stoichiometry(m::Model, id::String) = m.reactions[id].stoichiometry

A.metabolite_formula(m::Model, id::String) = m.metabolites[id].formula
A.metabolite_charge(m::Model, id::String) = m.metabolites[id].charge
A.metabolite_compartment(m::Model, id::String) = m.metabolites[id].compartment

A.coupling_weights(m::Model, id::String) = m.couplings[id].reaction_weights

A.load(::Type{Model}, path::String) = S.deserialize(path)
A.save(m::Model, path::String) = S.serialize(path, m)
A.filename_extensions(::Type{Model}) = ["canonical-serialized-fbc"]

function Base.convert(::Type{Model}, x::A.AbstractFBCModel)
    (lbs, ubs) = A.bounds(x)
    (clbs, cubs) = A.coupling_bounds(x)
    Model(
        reactions = Dict(
            r => Reaction(
                name = A.reaction_name(x, r),
                lower_bound = lb,
                upper_bound = ub,
                stoichiometry = A.reaction_stoichiometry(x, r),
                objective_coefficient = o,
                gene_association_dnf = A.reaction_gene_association_dnf(x, r),
                annotations = A.reaction_annotations(x, r),
                notes = A.reaction_notes(x, r),
            ) for (r, o, lb, ub) in zip(A.reactions(x), A.objective(x), lbs, ubs)
        ),
        metabolites = Dict(
            m => Metabolite(
                name = A.metabolite_name(x, m),
                balance = b,
                formula = A.metabolite_formula(x, m),
                charge = A.metabolite_charge(x, m),
                compartment = A.metabolite_compartment(x, m),
                annotations = A.metabolite_annotations(x, m),
                notes = A.metabolite_notes(x, m),
            ) for (m, b) in zip(A.metabolites(x), A.balance(x))
        ),
        genes = Dict(
            g => Gene(
                name = A.gene_name(x, g),
                annotations = A.gene_annotations(x, g),
                notes = A.gene_notes(x, g),
            ) for g in A.genes(x)
        ),
        couplings = Dict(
            c => Coupling(
                name = A.coupling_name(x, c),
                lower_bound = lb,
                upper_bound = ub,
                reaction_weights = A.coupling_weights(x, c),
                annotations = A.coupling_annotations(x, c),
                notes = A.coupling_notes(x, c),
            ) for (c, lb, ub) in zip(A.couplings(x), clbs, cubs)
        ),
    )
end

end # module CanonicalModel

The definition contains several main parts:

  • the data structures for the model and all the main parts
  • overloaded accessors that provide generic access for the things in the model
  • overloaded loading and saving functions, together with the declaration of the common model suffixes
  • a conversion function that can extract data using accessors from any other AbstractFBCModel and construct the canonical model.

Notably, the default file extension is chosen as very unwieldy so that no one ever really exchanges data using this model type.

Testing your model definition

Apart from making sure that the accessors work by usual unit tests, you can use 2 testing functions that scrutinize the expected properties of the model type both solely as a type, and using an example model file. These allow you to discover potential problems, as well as build a self-updating test suite for your model that provides long-term sustainability and quality assurance.

Running type-level tests

Typically, the test suite would run the following to check if types of everything match the expectations.

import AbstractFBCModels as A
import AbstractFBCModels.CanonicalModel: Model

A.run_fbcmodel_type_tests(Model);
Test Summary:                                                | Pass  Total  Time
Model type AbstractFBCModels.CanonicalModel.Model properties |   33     33  0.1s

Making a simple model for value tests

For testing the values, you need to provide an existing file that contains the model. Let's create some contents first:

import AbstractFBCModels.CanonicalModel: Reaction, Metabolite, Gene, Coupling

m = Model()
m.metabolites["m1"] = Metabolite(compartment = "inside")
m.metabolites["m2"] = Metabolite(compartment = "outside")
m.genes["g1"] = Gene()
m.genes["g2"] = Gene()
m.reactions["forward"] = Reaction(
    name = "import",
    stoichiometry = Dict("m1" => -1.0, "m2" => 1.0),
    gene_association_dnf = [["g1"], ["g2"]],
    objective_coefficient = 1.0,
)
m.reactions["and_back"] =
    Reaction(name = "export", stoichiometry = Dict("m2" => -1.0, "m1" => 1.0))
m.reactions["exchange1"] = Reaction(
    name = "exchange m1",
    stoichiometry = Dict("m1" => -1.0),
    gene_association_dnf = [[]], # DNF encoding of a reaction that requires no gene products
)
m.reactions["exchange2"] = Reaction(
    name = "exchange m2",
    stoichiometry = Dict("m2" => -1.0),
    gene_association_dnf = [], # DNF encoding of a reaction that never has gene products available
)
m.couplings["total_exchange_limit"] = Coupling(
    lower_bound = 0,
    upper_bound = 10,
    reaction_weights = Dict("exchange$i" => 1.0 for i = 1:2),
)

We should immediately find the basic accessors working:

A.stoichiometry(m)
2×4 SparseArrays.SparseMatrixCSC{Float64, Int64} with 6 stored entries:
  1.0  -1.0    ⋅   -1.0
 -1.0    ⋅   -1.0   1.0
A.objective(m)
4-element SparseArrays.SparseVector{Float64, Int64} with 1 stored entry:
  [4]  =  1.0
A.coupling(m)
1×4 SparseArrays.SparseMatrixCSC{Float64, Int64} with 2 stored entries:
  ⋅   1.0  1.0   ⋅ 

We can check various side things, such as which reactions would and would not work given all gene products disappear:

products_available = [
    A.reaction_gene_products_available(m, rid, _ -> false) for
    rid in ["forward", "and_back", "exchange1", "exchange2"]
]
4-element Vector{Union{Nothing, Bool}}:
 0
  nothing
 1
 0

We can now also write the model to disk and try to load it with the default loading function:

mktempdir() do dir
    path = joinpath(dir, "model.canonical-serialized-fbc")
    A.save(m, path)
    A.load(path)
end
AbstractFBCModels.CanonicalModel.Model(
  reactions = Dict{String, AbstractFBCModels.CanonicalModel.Reaction}("forward"…
  metabolites = Dict{String, AbstractFBCModels.CanonicalModel.Metabolite}("m2" …
  genes = Dict{String, AbstractFBCModels.CanonicalModel.Gene}("g2" => AbstractF…
  couplings = Dict{String, AbstractFBCModels.CanonicalModel.Coupling}("total_ex…
)

Running the value tests

Given the data, value tests have an opportunity to scrutinize much greater amount of properties of the model implementation.

Running the tests requires a model type and an "example" model file:

mktempdir() do dir
    path = joinpath(dir, "model.canonical-serialized-fbc")
    A.save(m, path)
    A.run_fbcmodel_file_tests(Model, path, name = "small model")
end;
Test Summary:                                                      | Pass  Total  Time
Model `small model' of type AbstractFBCModels.CanonicalModel.Model |   69     69  0.1s

This page was generated using Literate.jl.