Molecules¶
Properties, atom/bond access, building, and stereochemistry.
Molecular Formula and Mass¶
from chython import smiles
mol = smiles('CC(=O)Oc1ccccc1C(=O)O') # aspirin
mol.brutto # {'C': 9, 'H': 8, 'O': 4}
mol.brutto_formula # 'C9H8O4'
mol.brutto_formula_html # 'C<sub>9</sub>H<sub>8</sub>O<sub>4</sub>'
mol.molecular_mass # 180.159... (average atomic masses)
mol.molecular_charge # 0 (total formal charge)
Drug-Likeness Descriptors¶
mol = smiles('CC(=O)Oc1ccccc1C(=O)O')
mol.hydrogen_bond_donors_count # N/O/S with H
mol.hydrogen_bond_acceptors_count # O/N/S with lone pairs
mol.rotatable_bonds_count # non-ring single bonds (excludes amide-like)
mol.carbon_count
mol.carbon_sp3_count
mol.carbon_sp3_fraction # sp3 carbons / total carbons
Ring Properties¶
mol = smiles('c1ccc2ccccc2c1') # naphthalene
mol.sssr # list of smallest rings as tuples of atom numbers
mol.rings_count # number of SSSR rings
mol.atoms_rings # dict: atom_number -> list of ring tuples
mol.atoms_rings_sizes # dict: atom_number -> set of ring sizes
mol.aromatic_rings # tuple of aromatic ring atom tuples
Other Properties¶
mol.atoms_count # heavy atoms only
mol.bonds_count
mol.is_radical # True if any atom is a radical
Iterating Atoms and Bonds¶
mol = smiles('CCO')
# Iterate atom numbers
for n in mol:
print(n)
# Iterate (atom_number, atom_object) pairs
for n, atom in mol.atoms():
print(n, atom.atomic_symbol, atom.atomic_number)
# Iterate (n, m, bond) triples
for n, m, bond in mol.bonds():
print(n, m, int(bond)) # int(bond) = bond order: 1,2,3,4(aromatic)
# Connected components
components = mol.connected_components # list of sets of atom numbers
Single Atom / Bond Access¶
atom = mol.atom(1) # get atom by number
bond = mol.bond(1, 2) # get bond between atoms 1 and 2
mol.has_atom(1) # True/False
mol.has_bond(1, 2) # True/False
Atom Properties¶
atom = mol.atom(1)
atom.atomic_symbol # 'C', 'N', 'O', etc.
atom.atomic_number # 6, 7, 8, etc.
atom.atomic_mass # average atomic mass (float)
atom.isotope # isotope number or None
atom.charge # formal charge (int)
atom.is_radical # bool
atom.implicit_hydrogens # count of implicit H (int or None)
atom.explicit_hydrogens # count of explicit H neighbors
atom.neighbors # count of non-H neighbors
atom.hybridization # 1=sp3, 2=sp2, 3=sp, 4=aromatic
atom.heteroatoms # count of non-C, non-H neighbors
atom.ring_sizes # set of ring sizes containing this atom
atom.x, atom.y # 2D coordinates
atom.xy # Vector(x, y) - supports tuple unpacking
Atom Neighbors / Environment¶
# Full environment: (neighbor_num, bond, neighbor_atom)
for n, bond, neighbor in mol.environment(atom_num):
print(n, int(bond), neighbor.atomic_symbol)
# Just neighbor numbers
for n in mol.environment(atom_num, include_bond=False, include_atom=False):
print(n)
# (neighbor_num, bond) pairs
for n, bond in mol.environment(atom_num, include_atom=False):
print(n, int(bond))
Adjacency Matrix¶
import numpy as np
adj = mol.adjacency_matrix() # 0/1 matrix
adj = mol.adjacency_matrix(True) # bond orders as values
Building Molecules¶
from chython import MoleculeContainer
mol = MoleculeContainer()
# Add atoms (returns atom number)
n1 = mol.add_atom('C') # from symbol
n2 = mol.add_atom('C')
n3 = mol.add_atom(8) # from atomic number (oxygen)
# Add bonds (bond order: 1=single, 2=double, 3=triple, 4=aromatic)
mol.add_bond(n1, n2, 1)
mol.add_bond(n2, n3, 2)
print(str(mol)) # CC=O
# Assign specific atom numbers
mol = MoleculeContainer()
mol.add_atom('C', n=10) # atom number 10
mol.add_atom('O', n=20)
mol.add_bond(10, 20, 1)
# Delete atom/bond
mol.delete_bond(n2, n3)
mol.delete_atom(n3)
# Batch modifications (defer recalculation for performance)
n4 = mol.add_atom('N', _skip_calculation=True)
mol.add_bond(n2, n4, 1, _skip_calculation=True)
mol.fix_structure() # recalculate everything once
Merging and Splitting¶
from chython import smiles
# Split disconnected components
anion, cation = smiles('[Cl-].[Na+]').split()
print(anion, cation)
# Merge molecules (union)
salt = anion | cation
salt = anion.union(cation, remap=True) # fix atom number overlap
# Extract substructure by atom numbers
toluene = smiles('Cc1ccccc1')
ring = toluene.substructure([2, 3, 4, 5, 6, 7])
# Substructure with neighbors (1 bond deep)
aug = toluene.augmented_substructure([2], deep=1)
# Remap atom numbers (in-place; use copy() first to keep original)
mol_copy = toluene.copy()
mol_copy.remap({1: 10, 2: 20})
# Copy
mol_copy = mol.copy()
Stereochemistry¶
Inspecting¶
mol = smiles('C/C=C/C') # trans-2-butene
mol.stereogenic_tetrahedrons # dict: atom -> neighbors tuple
mol.stereogenic_allenes # dict: atom -> neighbors tuple
mol.stereogenic_cis_trans # dict: (n, m) bond -> substituents tuple
mol.chiral_tetrahedrons # set of atoms with assigned tetrahedral stereo
mol.chiral_cis_trans # set of bonds with assigned cis/trans stereo
Setting¶
mol = smiles('CC(O)F')
# Add tetrahedral stereo
# env = neighbor atom numbers defining chirality order
# mark = True (counterclockwise / S) or False (clockwise / R)
mol.add_atom_stereo(n=2, env=(1, 3, 4), mark=True)
# Add cis/trans stereo to double bond
# n, m = double bond atoms; n1, n2 = substituents
# mark = True (cis) or False (trans)
mol.add_cis_trans_stereo(n=2, m=3, n1=1, n2=4, mark=False)
# Auto-detect cis/trans from 2D coordinates
mol.calculate_cis_trans_from_2d()
# Wedge/hash bond indicators
mol.add_wedge(n=1, m=2, mark=1) # 1 = wedge, -1 = hash
# Clear all stereo
mol.clean_stereo()
# Recalculate stereo from current state
mol.fix_stereo()
Hashing and Comparison¶
Molecules are hashable and comparable via canonical SMILES:
mol1 = smiles('CCO')
mol2 = smiles('OCC')
mol1 == mol2 # True (same canonical SMILES)
hash(mol1) == hash(mol2) # True
# Use in sets and dicts
unique = {smiles('CCO'), smiles('OCC'), smiles('c1ccccc1')}
len(unique) # 2
# Cryptographic hash (SHA-512 based)
sig = bytes(mol1)
Warning: Avoid modifying molecules (standardize, aromatize, add/remove atoms) after placing them in sets or dicts. The hash will change and lookups will break.