Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Lesson 8: String methods notebook

my_str = 'The Dude abides.'

print("1", my_str[5])
print("2", my_str[:6])
print("3", my_str[::2])
print("4", my_str[::-1])
print("5", my_str[::-2])
1 u
2 The Du
3 TeDd bds
4 .sediba eduD ehT
5 .eiaeu h
def bit_of_string(my_str, start=0,
                  end=len(my_str), step=1):
    print(f"returning my_str[{start}:{end}:{step}]")
    return my_str[start:end:step]
print("a", bit_of_string(my_str, step=2))
returning my_str[0:16:2]
a TeDd bds

String Methods

# Define sequence
seq = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'

# Count G's and C's
seq.count('G') + seq.count('C')
16
my_name = 'Peter'
print(my_name.count('e'))
print('count the e characters in a literal',
      'Peter'.count('e'))
2
count the e characters in a literal 2
# methods are available based on the type of the value
# you're working with
my_num = 102
print(my_num.count(0))
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[5], line 4
      1 # methods are available based on the type of the value
      2 # you're working with
      3 my_num = 102
----> 4 print(my_num.count(0))

AttributeError: 'int' object has no attribute 'count'
# explaining *parameters aka *args
def my_print(prefix, *parameters):
    string_to_print = prefix + ':'
    for param in parameters:
        string_to_print += param
        string_to_print += ' '
    print(string_to_print)

my_print('Hello', 'world', 'today')
Hello:world today 
def gc_content(dna_string):
    length = len(dna_string)
    gc_count = dna_string.count('G') + dna_string.count('C')
    return gc_count / length

my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
print('gc content of my_dna', gc_content(my_dna))
gc content of my_dna 0.5333333333333333
# more with .count()
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
print('count GA', my_dna.count('GA'))

# count with substring again
print('count AA in AAAAAA', 'AAAAAA'.count('AA'))
count GA 2
count AA in AAAAAA 3
# find
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
my_dna.find('AUG')
10
print('look for nonsense', my_dna.find('nonsense'))
look for nonsense -1
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
pattern = 'AUG'
if my_dna.find(pattern) == -1:
    print(pattern, 'not found')
else:
    print(pattern, 'found at', my_dna.find(pattern))
AUG found at 10
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
pattern = 'AUG'
if pattern not in my_dna:
    print(pattern, 'not found')
else:
    print(pattern, 'found at', my_dna.find(pattern))
AUG found at 10
my_dna.find?
# find
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
for i, letter in enumerate(my_dna):
    print(i, letter)
found_at = my_dna.find('AUG')
print('AUG found at', found_at)
# look for the next 'AUG'
pattern = 'AUG'
pattern_length = len(pattern)
start_search_at = found_at + pattern_length
found_next = start_search_at + my_dna[start_search_at:].find(pattern)
print('AUG found next at', found_next)
0 G
1 A
2 C
3 A
4 G
5 A
6 C
7 U
8 C
9 C
10 A
11 U
12 G
13 C
14 A
15 C
16 G
17 U
18 G
19 G
20 G
21 U
22 A
23 U
24 C
25 A
26 U
27 G
28 U
29 C
AUG found at 10
AUG found next at 25
# rfind
my_dna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
pattern = 'AUG'
print(my_dna.rfind(pattern))
25
# reverse complement again
def complement_base(base):
    """Returns the Watson-Crick complement of a base."""
    # Convert to lowercase
    base = base.lower()
    
    if base == 'a':
        return 'T'
    elif base == 't':
        return 'A'
    elif base == 'g':
        return 'C'
    else:
        return 'G'
# .lower() and .upper()
my_name = 'Peter'
print(my_name.lower())
print(my_name.upper())
peter
PETER
# .replace
my_rna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
my_dna = my_rna.replace('U', 'T')
print('my_rna', my_rna)
print('my_dna', my_dna)
my_rna GACAGACUCCAUGCACGUGGGUAUCAUGUC
my_dna GACAGACTCCATGCACGTGGGTATCATGTC
my_rna = 'GACAGACUCCAUGCACGUGGGUAUCAUGUC'
my_dna = my_rna.replace('U', 'T')

def complement_seq(my_seq):
    my_seq = my_seq.upper()
    my_seq = my_seq.replace('G', 'c')
    my_seq = my_seq.replace('C', 'g')
    my_seq = my_seq.replace('T', 'a')
    my_seq = my_seq.replace('A', 't')
    return my_seq.upper()

print('my_dna              ', id(my_dna), my_dna)
my_dna = complement_seq(my_dna)
print('complement of my_dna', id(my_dna), my_dna)
my_dna               139504722109792 GACAGACTCCATGCACGTGGGTATCATGTC
complement of my_dna 139504722184432 CTGTCTGAGGTACGTGCACCCATAGTACAG
# .join()
'|'.join(['A','B','C'])
'A|B|C'
#.join again
'|'.join('ABC')
'A|B|C'
'one'.join('ABC')
'AoneBoneC'
# .split()
my_line = 'AC1354 1.2 654'
# split to list
data = my_line.split(' ')
print(data)
['AC1354', '1.2', '654']
data = my_line.split()
print(data)
['AC1354', '1.2', '654']
my_line2 = 'AC1354  1.2 654'
data = my_line2.split()
print(data)
['AC1354', '1.2', '654']
my_line2 = 'AC1354  1.2 654'
data = my_line2.split(' ')
print(data)
['AC1354', '', '1.2', '654']
# .join it
print(' '.join(data))
AC1354  1.2 654

Format and f-strings

my_str = """
Let's do a Mad Lib!
During this bootcamp, I feel {adjective}.
The instructors give us {plural_noun}.
""".format(adjective='truculent', plural_noun='haircuts')

print(my_str)

Let's do a Mad Lib!
During this bootcamp, I feel truculent.
The instructors give us haircuts.

distance = 113
town_name = 'Ceres'
my_str = 'The distance to {town} is {distance:04d} km'.format(town=town_name,
                                                              distance=distance)
print(my_str)
The distance to Ceres is 0113 km
first_name = 'Peter'
surname = 'van Heusden'
my_str = 'first name: {} surname: {}'.format(first_name, surname)
print(my_str)
first name: Peter surname: van Heusden
# f-string
first_name = 'Peter'
surname = 'van Heusden'
my_str = f'first name: {firstname} surname: {surname}'
print(my_str)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[31], line 4
      2 first_name = 'Peter'
      3 surname = 'van Heusden'
----> 4 my_str = f'first name: {firstname} surname: {surname}'
      5 print(my_str)

NameError: name 'firstname' is not defined
# f-string
first_name = 'Peter'
surname = 'van Heusden'
my_str = f'first name: {first_name} surname: {surname}'
print(my_str)
first name: Peter surname: van Heusden
# old school way - don't use, but you might find it in other code
my_str = 'first name: %s' % first_name
print(my_str)
first name: Peter