Example:def get_chr_coordinates_from_string(s): ''' Returns chromosome coordinates from a given string. Coordinates have to be in "chr0:0000-0000" style! ''' import re regex_for_chr = re.compile('chr[0-9a-zA-Z_]*:') regex_for_start_end = re.compile('chr[0-9a-zA-Z_]*:[0-9]+-[0-9]+') chr = re.search(regex_for_chr, s).group().replace(':', '') start, end = re.search(regex_for_start_end, s).group().split(':')[1].split('-') return(chr, start, end)
>>> s=">hg19_refGene_NM_012147 range=chr4:191008763-191010142 5'pad=0 3'pad=0 strand=+ repeatMasking=none"
>>> get_chr_coordinates_from_string(s)
('chr4', '191008763', '191010142')
Code highlighted using http://pygments.org/
No comments:
Post a Comment