BeckResearchLab · NeerajPise · May 12, 2020 · May 12, 2020
diff --git a/smdt/descriptors/kappa.py b/smdt/descriptors/kappa.py
@@ -1,11 +1,23 @@
 
 from rdkit import Chem
 from rdkit.Chem import rdchem
-from rdkit.Chem import pyPeriodicTable as PeriodicTable
-periodicTable = rdchem.GetPeriodicTable()
+#from rdkit.Chem import pyPeriodicTable as PeriodicTable
+periodicTable = Chem.GetPeriodicTable()
 import pandas as pd
 
 
+hallKierAlphas = {'Br': [None, None, 0.48],
+                  'C': [-0.22, -0.13, 0.0],
+                  'Cl': [None, None, 0.29],
+                  'F': [None, None, -0.07],
+                  'H': [0.0, 0.0, 0.0],
+                  'I': [None, None, 0.73],
+                  'N': [-0.29, -0.2, -0.04],
+                  'O': [None, -0.2, -0.04],
+                  'P': [None, 0.3, 0.43],
+                  'S': [None, 0.22, 0.35]}
+
+
 def CalculateKappa1(mol):
     """
     Calculation of molecular shape index for one bonded fragment
@@ -59,12 +71,12 @@ def _HallKierAlpha(mol):
     Calculation of the Hall-Kier alpha value for a molecule
     """
     alphaSum = 0.0
-    rC = PeriodicTable.nameTable['C'][5]
+    rC = periodicTable.GetRb0(6)
     for atom in mol.GetAtoms():
         atNum = atom.GetAtomicNum()
         if not atNum: continue
         symb = atom.GetSymbol()
-        alphaV = PeriodicTable.hallKierAlphas.get(symb, None)
+        alphaV = hallKierAlphas.get(symb, None)
         if alphaV is not None:
             hyb = atom.GetHybridization() - 2
             if hyb < len(alphaV):
@@ -74,7 +86,7 @@ def _HallKierAlpha(mol):
             else:
                 alpha = alphaV[-1]
         else:
-            rA = PeriodicTable.nameTable[symb][5]
+            rA = periodicTable.GetRb0(atNum)
             alpha = rA / rC - 1
         alphaSum += alpha
     return alphaSum

diff --git a/smdt/molecular_descriptors.py b/smdt/molecular_descriptors.py
@@ -112,7 +112,7 @@
         'PEOEVSA10', 'PEOEVSA11', 'PEOEVSA12', 'PEOEVSA13', 'EstateVSA0', 'EstateVSA1', 'EstateVSA2',
         'EstateVSA3', 'EstateVSA4', 'EstateVSA5', 'EstateVSA6', 'EstateVSA7', 'EstateVSA8', 'EstateVSA9',
         'EstateVSA10', 'VSAEstate0', 'VSAEstate1', 'VSAEstate2', 'VSAEstate3', 'VSAEstate4', 'VSAEstate5',
-        'VSAEstate6', 'VSAEstate7', 'VSAEstate8', 'VSAEstate9', 'VSAEstate10']
+        'VSAEstate6', 'VSAEstate7', 'VSAEstate8', 'VSAEstate9']
 
 _moran = ['MATSm1', 'MATSm2', 'MATSm3', 'MATSm4', 'MATSm5', 'MATSm6', 'MATSm7', 'MATSm8', 'MATSv1',
           'MATSv2', 'MATSv3', 'MATSv4', 'MATSv5', 'MATSv6', 'MATSv7', 'MATSv8', 'MATSe1', 'MATSe2',
@@ -162,6 +162,7 @@ def getAllDescriptorsforMol(mol):
                    'geary': _geary, 'kappa': _kappa, 'moe': _moe, 'moran': _moran, 'moreaubroto': _moreaubroto}
 
 
+
 def getDescriptors(data, descriptor_type = 'topology'):
     smiles, target = utils.descriptor_target_split(data)
     cols = descriptor_list[descriptor_type]
@@ -180,11 +181,18 @@ def getAllDescriptors(data):
     smiles, target = utils.descriptor_target_split(data)
     cols = _topology + _constitutional + _bcut + _basak + _cats2d + _charge + _connectivity + _estate + _geary + _kappa + _moe + _moran + _moreaubroto
     AllDescriptors = pd.DataFrame(columns=cols)
+    ignore = ['Ta', 'Nb', 'Os', 'Y', 'Ir', 'Re', 'Ba', 'Ac', 'Ti', 'U','V','Hf', 'La', 'Nd', 'Eu', 'Dy', 'Ce', 'Sm', 'Pd', 'Zr', 'Ru', 'W', 'Rh', 'Er', 'Th']
     print('\nCalculating Molecular Descriptors...')
-    for i in range(len(smiles)):
-        print('Row %d out of %d' % (i + 1, len(smiles)), end='')
-        print('\r', end='')
-        AllDescriptors.loc[i] = getAllDescriptorsforMol(Chem.MolFromSmiles(smiles['SMILES'][i]))
+    for i in range(0,len(data)):
+        break_counter = 0;
+        for j in ignore:
+            if j in smiles['SMILES'][i]:
+                break_counter = 1;
+
+        if break_counter == 0:     
+            print('Row %d out of %d' % (i + 1, len(smiles)), end='')
+            print('\r', end='') 
+            AllDescriptors.loc[i] = getAllDescriptorsforMol(Chem.MolFromSmiles(smiles['SMILES'][i]))
     final_df = utils.descriptor_target_join(AllDescriptors, target)
     print('\nCalculating Molecular Descriptors Completed.')
     return final_df