Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Question 1

Question 1

line1 = 'QR57613 1.3 Serpentes Pythonidae'
parts = line1.split()
num_words = len(parts)
print(num_words)
4

Question 2

line2 = 'QR57613\t1.3\tSerpentes\tPythonidae\tPython regius'
fields = line2.split('\t')
num_fields = len(fields)
print(num_fields)
5
line3 = 'ABC^^DEF^^GHI'
print(line3.split('^^'))
['ABC', 'DEF', 'GHI']

Question 3

findhom_result = """#FINDHOM v 1.2:
Search results:
Query\tMatch fraction\tScore\tSubject
SMPL001\t0.7\t12331\tAQ10213 Phlebotomus perniciosus
SMPL003\t0.5\t6032\tBZ102363 Phlebotomus papatasi
SMPL004\t0.8\t13123\tRD178237 Sergentomyia dubia
SMPL007\t0.6\t10610\tBQ187981 Phlebotomus papatasi"""
findhom_result
'#FINDHOM v 1.2:\nSearch results:\nQuery\tMatch fraction\tScore\tSubject\nSMPL001\t0.7\t12331\tAQ10213 Phlebotomus perniciosus\nSMPL003\t0.5\t6032\tBZ102363 Phlebotomus papatasi\nSMPL004\t0.8\t13123\tRD178237 Sergentomyia dubia\nSMPL007\t0.6\t10610\tBQ187981 Phlebotomus papatasi'
lines = findhom_result.split('\n')  # split on newline
for line in lines:
    print("LINE:", line)
LINE: #FINDHOM v 1.2:
LINE: Search results:
LINE: Query	Match fraction	Score	Subject
LINE: SMPL001	0.7	12331	AQ10213 Phlebotomus perniciosus
LINE: SMPL003	0.5	6032	BZ102363 Phlebotomus papatasi
LINE: SMPL004	0.8	13123	RD178237 Sergentomyia dubia
LINE: SMPL007	0.6	10610	BQ187981 Phlebotomus papatasi
lines[-1]
'SMPL007\t0.6\t10610\tBQ187981 Phlebotomus papatasi'
print(lines[-1])
SMPL007	0.6	10610	BQ187981 Phlebotomus papatasi

Question 4

string1 = 'Mountain Goat'
string2 = 'Field Goat'
print(string1.startswith('Mountain'), 'Mountain')
print(string2.startswith('Mountain'), 'Mountain')

# there is also .endswith()

# is the word Mountain in string1 ?
got_mountain = 'Mountain' in string1
print(got_mountain)
True Mountain
False Mountain
True
total = 0
# convert findhom_results into lines
lines = findhom_results.__A____
# search through all of the lines
___B____
    # check if the line starts with 'SMPL'
    if __C___
        total += 1
print("total count of 'SMPL'", total)
  Cell In[10], line 7
    if __C___
    ^
IndentationError: unexpected indent
total = 0
# convert findhom_results into lines
lines = findhom_result.split('\n')
# search through all of the lines
for line in lines:
    print("LINE:", line)
    # check if the line starts with 'SMPL'
    if line.startswith('SMPL'):
        total += 1
print("total count of 'SMPL'", total)
LINE: #FINDHOM v 1.2:
LINE: Search results:
LINE: Query	Match fraction	Score	Subject
LINE: SMPL001	0.7	12331	AQ10213 Phlebotomus perniciosus
LINE: SMPL003	0.5	6032	BZ102363 Phlebotomus papatasi
LINE: SMPL004	0.8	13123	RD178237 Sergentomyia dubia
LINE: SMPL007	0.6	10610	BQ187981 Phlebotomus papatasi
total count of 'SMPL' 4

Question 5

result_count = 0
lines = findhom_result.split('\n')
for line in lines:
    if '\t' in line:
        result_count += 1
result_count = result_count - 1
print(result_count)
4
# version 2
result_count = 0
lines = findhom_result.split('\n')
for line in lines:
    fields = line.split('\t')
    # count all of the lines that have 4 tab seperated fields
    if len(fields) == 4:
        result_count += 1
result_count = result_count - 1
print(result_count)
4
# solve it using a "state machine" with two states: header found and header not found
count = 0
header_found = False
lines = findhom_result.split('\n')
for line in lines:
    if line.startswith("Query"):
        header_found = True
        continue          # skip the header line itself
    if header_found:
        count += 1

print(count)
4
# solve it using a "state machine" with two states: header found and header not found
count = 0
header_found = False
lines = findhom_result.split('\n')
for line in lines:
    if header_found:
        count += 1
    if line.startswith("Query"):
        header_found = True

print(count)
4