Skip to content
GitLab
Explore
Sign in
Commits on Source (4)
Improvements to chapter bundling mechanisms
· e69c18a2
Hermann Krumrey
authored
Sep 29, 2019
e69c18a2
Version bump to 0.3.0
· d8feb9d2
Hermann Krumrey
authored
Sep 30, 2019
d8feb9d2
Fix type errors
· cec99fbb
Hermann Krumrey
authored
Sep 30, 2019
cec99fbb
Merge branch 'develop' into 'master'
· 5f652191
Hermann Krumrey
authored
Sep 30, 2019
Develop See merge request namibsun/python/manga-dl!2
5f652191
Show whitespace changes
Inline
Side-by-side
CHANGELOG
View file @
5f652191
V 0.3.0:
- Various fixes to the mechanism that bundles chapters
V 0.2.0:
- Added mangadex support
- Redid application structure
...
...
bin/manga-dl
View file @
5f652191
...
...
@@ -54,6 +54,9 @@ def main(args: argparse.Namespace):
))
for c in chapters:
if args.list:
print(c)
else:
c.download()
...
...
@@ -72,6 +75,8 @@ if __name__ == "__main__":
parser.add_argument("-f", "--format",
choices={"cbz", "raw"}, default="cbz",
help="The format in which to store the chapters")
parser.add_argument("-l", "--list", action="store_true",
help="Lists all found chapters")
for _scraper_cls in scrapers:
parser.add_argument("--{}-id".format(_scraper_cls.name()),
...
...
manga_dl/entities/Chapter.py
View file @
5f652191
...
...
@@ -42,7 +42,8 @@ class Chapter:
destination_dir
:
str
,
_format
:
str
,
page_load_callback
:
Callable
[[
'
Chapter
'
,
str
],
List
[
str
]],
title
:
Optional
[
str
]
=
None
title
:
Optional
[
str
]
=
None
,
group
:
Optional
[
str
]
=
None
):
"""
Initializes the manga chapter
...
...
@@ -55,6 +56,7 @@ class Chapter:
:param _format: The format in which to store the chapter when
downloading by default
:param title: The title of the chapter
:param group: The group that scanlated this chapter
:param page_load_callback:
"""
self
.
logger
=
logging
.
getLogger
(
self
.
__class__
.
__name__
)
...
...
@@ -66,10 +68,13 @@ class Chapter:
self
.
format
=
_format
self
.
_page_load_callback
=
page_load_callback
self
.
_pages
=
[]
# type: List[str]
self
.
_additional_urls
=
[]
# type: List[str]
self
.
_last_additional_urls
=
[]
# type: List[str]
self
.
group
=
group
self
.
title
=
title
if
self
.
chapter_number
==
""
:
self
.
chapter_number
=
"
0
"
if
self
.
chapter_number
==
""
or
chapter_number
==
"
0
"
:
self
.
chapter_number
=
"
0
.0
"
@property
def
name
(
self
)
->
str
:
...
...
@@ -77,8 +82,10 @@ class Chapter:
:return: The name of the chapter
"""
name
=
"
{} - Chapter {}
"
.
format
(
self
.
series_name
,
self
.
chapter_number
)
if
self
.
title
is
not
None
:
if
self
.
title
is
not
None
and
self
.
title
!=
""
:
name
+=
"
-
"
+
self
.
title
if
self
.
group
is
not
None
and
self
.
group
!=
""
:
name
+=
"
({})
"
.
format
(
self
.
group
)
return
name
@property
...
...
@@ -87,10 +94,72 @@ class Chapter:
Lazy-loads the URLs of the chapter
'
s page images
:return: The list of page images, in the correct order
"""
if
len
(
self
.
_pages
)
==
0
:
new_urls
=
self
.
_last_additional_urls
!=
self
.
_additional_urls
if
len
(
self
.
_pages
)
==
0
or
new_urls
:
self
.
_pages
=
self
.
_page_load_callback
(
self
,
self
.
url
)
for
url
in
self
.
_additional_urls
:
self
.
_pages
+=
self
.
_page_load_callback
(
self
,
url
)
self
.
_last_additional_urls
=
list
(
self
.
_additional_urls
)
return
self
.
_pages
@property
def
macro_chapter
(
self
)
->
int
:
"""
Calculates the
'
macro
'
chapter number. For example:
12 -> 12
15.5 -> 15
EX4 -> 4
:return: The macro chapter number
"""
macro
=
self
.
chapter_number
.
split
(
"
.
"
)[
0
]
macro_num
=
""
for
char
in
macro
:
if
char
.
isnumeric
():
macro_num
+=
char
return
int
(
macro_num
)
@property
def
micro_chapter
(
self
)
->
int
:
"""
Calculates the
'
micro
'
chapter number. For example:
12 -> 0
15.5 -> 5
EX4 -> 0
:return: The micro chapter number
"""
try
:
micro
=
self
.
chapter_number
.
split
(
"
.
"
)[
1
]
micro_num
=
""
for
char
in
micro
:
if
char
.
isnumeric
():
micro_num
+=
char
return
int
(
micro_num
)
except
IndexError
:
return
0
@property
def
is_special
(
self
)
->
bool
:
"""
:return: Whether or not this is a
'
special
'
chapter (Omake etc)
"""
if
"
.
"
in
self
.
chapter_number
or
self
.
macro_chapter
==
0
:
return
True
else
:
try
:
int
(
self
.
chapter_number
)
return
False
except
ValueError
:
return
True
def
add_additional_url
(
self
,
url
:
str
):
"""
Adds an additional URL.
Useful for multi-part chapters
:param url: The URL to add
:return: None
"""
self
.
_additional_urls
.
append
(
url
)
def
download
(
self
,
file_path_override
:
Optional
[
str
]
=
None
,
...
...
@@ -158,3 +227,14 @@ class Chapter:
:return: The string representation of the object
"""
return
self
.
name
def
__eq__
(
self
,
other
:
object
)
->
bool
:
"""
Checks for equality with other objects
:param other: The other object
:return: Whether or not the objects are the same
"""
if
not
isinstance
(
other
,
Chapter
):
return
False
else
:
return
other
.
url
==
self
.
url
manga_dl/scrapers/Scraper.py
View file @
5f652191
...
...
@@ -18,7 +18,7 @@ along with manga-dl. If not, see <http://www.gnu.org/licenses/>.
LICENSE
"""
import
logging
from
typing
import
Optional
,
List
,
Set
from
typing
import
Optional
,
List
,
Set
,
Dict
from
manga_dl.entities.Chapter
import
Chapter
...
...
@@ -94,7 +94,21 @@ class Scraper:
url
=
self
.
generate_url
(
_id
)
chapters
=
self
.
_load_chapters
(
str
(
url
))
chapters
=
self
.
_remove_other_languages
(
chapters
)
chapters
=
self
.
_sort_chapters
(
chapters
)
chapters
=
self
.
_deduplicate_chapters
(
chapters
)
chapters
=
self
.
_combine_multipart_chapters
(
chapters
)
return
chapters
@staticmethod
def
_sort_chapters
(
chapters
:
List
[
Chapter
])
->
List
[
Chapter
]:
"""
Sorts a list of chapters. First by their total chapter number,
then their macro chapter number
:param chapters:
:return:
"""
# Both sort steps are necessary!
chapters
.
sort
(
key
=
lambda
x
:
str
(
x
.
chapter_number
).
zfill
(
15
)
...
...
@@ -102,8 +116,134 @@ class Scraper:
chapters
.
sort
(
key
=
lambda
x
:
str
(
x
.
chapter_number
.
split
(
"
.
"
)[
0
]).
zfill
(
15
)
)
return
chapters
def
_remove_other_languages
(
self
,
chapters
:
List
[
Chapter
])
\
->
List
[
Chapter
]:
"""
Removes unwanted languages from the chapter list
:param chapters: The chapter list
:return: The chapter list without unwanted language entries
"""
return
list
(
filter
(
lambda
x
:
x
.
language
in
self
.
languages
,
chapters
))
def
_combine_multipart_chapters
(
self
,
chapters
:
List
[
Chapter
])
\
->
List
[
Chapter
]:
"""
Combines multipart chapters with each other (e.g. 12.1 and 12.2)
:param chapters: The list of chapter to work through
:return: The new chapter list
"""
if
len
(
chapters
)
<
2
:
return
chapters
last_chapter
=
chapters
.
pop
(
0
)
combined_chapters
=
[]
# type: List[Chapter]
to_combine
=
[]
# type: List[Chapter]
diff
=
1
for
chapter
in
chapters
:
new_chapter
=
last_chapter
.
macro_chapter
!=
chapter
.
macro_chapter
if
chapter
.
micro_chapter
==
1
and
new_chapter
:
self
.
logger
.
debug
(
"
Marking chapter {} as {}
"
.
format
(
chapter
.
chapter_number
,
chapter
.
macro_chapter
))
chapter
.
chapter_number
=
str
(
chapter
.
macro_chapter
)
if
last_chapter
.
macro_chapter
==
chapter
.
macro_chapter
:
same_chapter
=
\
last_chapter
.
micro_chapter
+
diff
==
chapter
.
micro_chapter
if
last_chapter
.
micro_chapter
==
0
\
and
chapter
.
micro_chapter
==
2
:
same_chapter
=
True
diff
=
2
if
same_chapter
:
to_combine
.
append
(
chapter
)
diff
+=
1
continue
if
len
(
to_combine
)
>
0
and
last_chapter
.
micro_chapter
in
[
0
,
1
]:
self
.
_combine_chapters
(
last_chapter
,
to_combine
)
to_combine
=
[]
diff
=
1
combined_chapters
.
append
(
last_chapter
)
combined_chapters
+=
to_combine
to_combine
=
[]
last_chapter
=
chapter
if
len
(
to_combine
)
>
0
and
last_chapter
.
micro_chapter
in
[
0
,
1
]:
self
.
_combine_chapters
(
last_chapter
,
to_combine
)
to_combine
=
[]
combined_chapters
.
append
(
last_chapter
)
combined_chapters
+=
to_combine
return
combined_chapters
def
_combine_chapters
(
self
,
chapter
:
Chapter
,
to_combine
:
List
[
Chapter
]):
"""
Adds chapters to a chapter
:param chapter: The master chapter
:param to_combine: The chapters to add
:return: None
"""
combined_numbers
=
[
chapter
.
chapter_number
]
chapter
.
chapter_number
=
str
(
chapter
.
macro_chapter
)
for
extra
in
to_combine
:
chapter
.
add_additional_url
(
extra
.
url
)
combined_numbers
.
append
(
extra
.
chapter_number
)
self
.
logger
.
debug
(
"
Combined chapters: {}
"
.
format
(
combined_numbers
))
def
_deduplicate_chapters
(
self
,
chapters
:
List
[
Chapter
])
->
List
[
Chapter
]:
"""
Removes duplicate chapters from a list
The chapter to use is based on which scanlation group was most often
found in the other chapters
:param chapters: The chapters to work through
:return: The deduplicated list of chapters
"""
if
len
(
chapters
)
<
2
:
return
chapters
groups
=
{}
# type: Dict[str, int]
chapter_map
=
{}
# type: Dict[str, List[Chapter]]
for
chapter
in
chapters
:
if
chapter
.
group
not
in
groups
:
groups
[
str
(
chapter
.
group
)]
=
1
else
:
groups
[
str
(
chapter
.
group
)]
+=
1
if
chapter
.
chapter_number
not
in
chapter_map
:
chapter_map
[
chapter
.
chapter_number
]
=
[]
chapter_map
[
chapter
.
chapter_number
].
append
(
chapter
)
for
chapter_number
,
elements
in
chapter_map
.
items
():
if
len
(
elements
)
>
1
:
best
=
max
(
elements
,
key
=
lambda
x
:
groups
[
str
(
x
.
group
)])
chapter_map
[
chapter_number
]
=
[
best
]
deduplicated
=
[]
for
chapter
in
chapters
:
best_chapter
=
chapter_map
[
chapter
.
chapter_number
][
0
]
if
best_chapter
==
chapter
:
deduplicated
.
append
(
chapter
)
else
:
self
.
logger
.
debug
(
"
Discarding duplicate chapter {}
"
.
format
(
chapter
))
return
deduplicated
def
_load_chapters
(
self
,
url
:
str
)
->
List
[
Chapter
]:
"""
Scraper-specific implementation that loads chapters from the website
...
...
manga_dl/scrapers/mangadex.py
View file @
5f652191
...
...
@@ -95,6 +95,7 @@ class MangaDexScraper(Scraper):
self
.
format
,
self
.
get_image_pages
,
chapter
[
"
title
"
],
chapter
[
"
group_name
"
]
))
return
chapters
...
...
version
View file @
5f652191
0.2.0
\ No newline at end of file
0.3.0
\ No newline at end of file