Skip to content

Commit

Permalink
Merge pull request #185 from dod-advana/hotfix/samm_spider
Browse files Browse the repository at this point in the history
changed display_doc_type for the docs
  • Loading branch information
vat99 committed Aug 10, 2023
2 parents 3ae68b1 + 9ca3efd commit e873a08
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions dataPipelines/gc_scrapy/gc_scrapy/spiders/samm_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def parse_document_page_chapters(self, response, chapter, chapter_title):

doc_num = chapter
doc_type = "SAMM"
display_doc_type = "SAMM"
display_doc_type = "Document"
doc_name = " ".join([doc_type, chapter])
doc_name = doc_name.replace(" ", "_")

Expand Down Expand Up @@ -107,7 +107,7 @@ def parse_document_page_memos(self, response, doc_title, doc_name, publication_d
doc_type = "SAMM Policy Memoranda"

doc_name = self.ascii_clean(doc_name.replace(" ", "_"))
display_doc_type = "SAMM"
display_doc_type = "Memorandum"

doc_name = re.sub(r'[\(\),]', '', doc_name) # Remove parentheses and commas
doc_name = re.sub(r'[\W_\.]+$', '', doc_name) # Remove any special characters at the end
Expand Down

0 comments on commit e873a08

Please sign in to comment.