my_str = 'The Dude abides.'
print("1", my_str[5])
print("2", my_str[:6])
print("3", my_str[::2])
print("4", my_str[::-1])
print("5", my_str[::-2])1 u
2 The Du
3 TeDd bds
4 .sediba eduD ehT
5 .eiaeu h
def bit_of_string(my_str, start=0,
end=len(my_str), step=1):
print(f"returning my_str[{start}:{end}:{step}]")
return my_str[start:end:step]
print("a", bit_of_string(my_str, step=2))returning my_str[0:16:2]
a TeDd bds
String Methods¶
# Define sequence
seq = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
# Count G's and C's
seq.count('G') + seq.count('C')16my_name = 'Peter'
print(my_name.count('e'))
print('count the e characters in a literal',
'Peter'.count('e'))2
count the e characters in a literal 2
# methods are available based on the type of the value
# you're working with
my_num = 102
print(my_num.count(0))---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[5], line 4
1 # methods are available based on the type of the value
2 # you're working with
3 my_num = 102
----> 4 print(my_num.count(0))
AttributeError: 'int' object has no attribute 'count'# explaining *parameters aka *args
def my_print(prefix, *parameters):
string_to_print = prefix + ':'
for param in parameters:
string_to_print += param
string_to_print += ' '
print(string_to_print)
my_print('Hello', 'world', 'today')Hello:world today
def gc_content(dna_string):
length = len(dna_string)
gc_count = dna_string.count('G') + dna_string.count('C')
return gc_count / length
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
print('gc content of my_dna', gc_content(my_dna))gc content of my_dna 0.5333333333333333
# more with .count()
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
print('count GA', my_dna.count('GA'))
# count with substring again
print('count AA in AAAAAA', 'AAAAAA'.count('AA'))count GA 2
count AA in AAAAAA 3
# find
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
my_dna.find('AUG')10print('look for nonsense', my_dna.find('nonsense'))look for nonsense -1
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
pattern = 'AUG'
if my_dna.find(pattern) == -1:
print(pattern, 'not found')
else:
print(pattern, 'found at', my_dna.find(pattern))AUG found at 10
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
pattern = 'AUG'
if pattern not in my_dna:
print(pattern, 'not found')
else:
print(pattern, 'found at', my_dna.find(pattern))AUG found at 10
my_dna.find?# find
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
for i, letter in enumerate(my_dna):
print(i, letter)
found_at = my_dna.find('AUG')
print('AUG found at', found_at)
# look for the next 'AUG'
pattern = 'AUG'
pattern_length = len(pattern)
start_search_at = found_at + pattern_length
found_next = start_search_at + my_dna[start_search_at:].find(pattern)
print('AUG found next at', found_next)0 G
1 A
2 C
3 A
4 G
5 A
6 C
7 U
8 C
9 C
10 A
11 U
12 G
13 C
14 A
15 C
16 G
17 U
18 G
19 G
20 G
21 U
22 A
23 U
24 C
25 A
26 U
27 G
28 U
29 C
AUG found at 10
AUG found next at 25
# rfind
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
pattern = 'AUG'
print(my_dna.rfind(pattern))25
# reverse complement again
def complement_base(base):
"""Returns the Watson-Crick complement of a base."""
# Convert to lowercase
base = base.lower()
if base == 'a':
return 'T'
elif base == 't':
return 'A'
elif base == 'g':
return 'C'
else:
return 'G'# .lower() and .upper()
my_name = 'Peter'
print(my_name.lower())
print(my_name.upper())peter
PETER
# .replace
my_rna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
my_dna = my_rna.replace('U', 'T')
print('my_rna', my_rna)
print('my_dna', my_dna)my_rna GACAGACUCCAUGCACGUGGGUAUCAUGUC
my_dna GACAGACTCCATGCACGTGGGTATCATGTC
my_rna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
my_dna = my_rna.replace('U', 'T')
def complement_seq(my_seq):
my_seq = my_seq.upper()
my_seq = my_seq.replace('G', 'c')
my_seq = my_seq.replace('C', 'g')
my_seq = my_seq.replace('T', 'a')
my_seq = my_seq.replace('A', 't')
return my_seq.upper()
print('my_dna ', id(my_dna), my_dna)
my_dna = complement_seq(my_dna)
print('complement of my_dna', id(my_dna), my_dna)my_dna 139504722109792 GACAGACTCCATGCACGTGGGTATCATGTC
complement of my_dna 139504722184432 CTGTCTGAGGTACGTGCACCCATAGTACAG
# .join()
'|'.join(['A','B','C'])'A|B|C'#.join again
'|'.join('ABC')'A|B|C''one'.join('ABC')'AoneBoneC'# .split()
my_line = 'AC1354 1.2 654'
# split to list
data = my_line.split(' ')
print(data)['AC1354', '1.2', '654']
data = my_line.split()
print(data)['AC1354', '1.2', '654']
my_line2 = 'AC1354 1.2 654'
data = my_line2.split()
print(data)['AC1354', '1.2', '654']
my_line2 = 'AC1354 1.2 654'
data = my_line2.split(' ')
print(data)['AC1354', '', '1.2', '654']
# .join it
print(' '.join(data))AC1354 1.2 654
Format and f-strings¶
my_str = """
Let's do a Mad Lib!
During this bootcamp, I feel {adjective}.
The instructors give us {plural_noun}.
""".format(adjective='truculent', plural_noun='haircuts')
print(my_str)
Let's do a Mad Lib!
During this bootcamp, I feel truculent.
The instructors give us haircuts.
distance = 113
town_name = 'Ceres'
my_str = 'The distance to {town} is {distance:04d} km'.format(town=town_name,
distance=distance)
print(my_str)The distance to Ceres is 0113 km
first_name = 'Peter'
surname = 'van Heusden'
my_str = 'first name: {} surname: {}'.format(first_name, surname)
print(my_str)first name: Peter surname: van Heusden
# f-string
first_name = 'Peter'
surname = 'van Heusden'
my_str = f'first name: {firstname} surname: {surname}'
print(my_str)---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[31], line 4
2 first_name = 'Peter'
3 surname = 'van Heusden'
----> 4 my_str = f'first name: {firstname} surname: {surname}'
5 print(my_str)
NameError: name 'firstname' is not defined# f-string
first_name = 'Peter'
surname = 'van Heusden'
my_str = f'first name: {first_name} surname: {surname}'
print(my_str)first name: Peter surname: van Heusden
# old school way - don't use, but you might find it in other code
my_str = 'first name: %s' % first_name
print(my_str)first name: Peter